From 379e5658a68032a2dc405472822932560f72c770 Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Thu, 19 Feb 2026 16:16:58 +0100 Subject: [PATCH 001/127] Use hgvs_position_model dataclass for HGVS position results --- mutalyzer_crossmapper/hgvs_position_model.py | 40 ++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 mutalyzer_crossmapper/hgvs_position_model.py diff --git a/mutalyzer_crossmapper/hgvs_position_model.py b/mutalyzer_crossmapper/hgvs_position_model.py new file mode 100644 index 0000000..59915b7 --- /dev/null +++ b/mutalyzer_crossmapper/hgvs_position_model.py @@ -0,0 +1,40 @@ +""" +HGVS Position Model - + a dataclass object to bridge HGVS position component and Crossmapper outputs. +""" +from dataclasses import dataclass +from typing import Optional + +@dataclass +class HGVSPositionModel: + """ + Represent the position component of an HGVS variant description. + This model captures details necessary to describe the '[position]' part in an HGVS + description of the form + [reference sequence]:[sequence type].[position][variant type][change] + """ + position: int + offset: Optional[int] = None + region: Optional[str] = None + position_in_codon: Optional[int] = None + + + def __post_init__(self): + # validate position + if self.position <= 0: + raise ValueError("Position must be a positive integer.") + + # validate region + region_values = {"u", "-", "", "*", "d"} + if self.region is not None and self.region not in region_values: + raise ValueError( + f"Invalid region value: {self.region}. Allowed values are: {region_values}" + ) + + # validate position_in_codon + codon_values = {1, 2, 3} + if self.position_in_codon is not None and self.position_in_codon not in codon_values: + raise ValueError( + f"Invalid position in codon value: {self.position_in_codon}. " + f"Allowed values are: {codon_values}" + ) From bbd38dabbeeebf960d2012ff8e4a7788f5f71928 Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Thu, 19 Feb 2026 18:20:30 +0100 Subject: [PATCH 002/127] Add function to convert from tuple to HGVSPositionModel --- mutalyzer_crossmapper/hgvs_position_model.py | 54 +++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) diff --git a/mutalyzer_crossmapper/hgvs_position_model.py b/mutalyzer_crossmapper/hgvs_position_model.py index 59915b7..7992c63 100644 --- a/mutalyzer_crossmapper/hgvs_position_model.py +++ b/mutalyzer_crossmapper/hgvs_position_model.py @@ -3,7 +3,7 @@ a dataclass object to bridge HGVS position component and Crossmapper outputs. """ from dataclasses import dataclass -from typing import Optional +from typing import Optional, Tuple @dataclass class HGVSPositionModel: @@ -38,3 +38,55 @@ def __post_init__(self): f"Invalid position in codon value: {self.position_in_codon}. " f"Allowed values are: {codon_values}" ) + + + # Convert from tuple to HGVSPositionModel + + #TODO: check for inverted and degerate options, now only support non-inverted and non-degenerate cases + @classmethod + def to_hgvs_position_model(cls, raw_tuple:Tuple): + """Convert crossmapper tuple to an HGVSPositionModel instance.""" + if not raw_tuple: + raise ValueError("Input tuple position cannot be empty.") + + # Genomic + if len(raw_tuple) == 1: + return cls(position=raw_tuple[0]) + # Non-coding + if len(raw_tuple) == 3: + pass + + # Coding + #(c_pos, offset, in_cds, offset_to_exon_boundary) + if len(raw_tuple) == 4: + c_pos, offset, cds, dis_to_exon_boundary = raw_tuple + region = cls._determine_region(cds, dis_to_exon_boundary) + return cls(position=c_pos, offset=offset, region=region) + + # Protein ( + if len(raw_tuple) == 5: + p_pos, codon_pos, offset, cds, dis_to_exon_boundary = raw_tuple + if cds == 0: # in CDS + return cls( + position=p_pos, + region="", + position_in_codon=codon_pos + ) + else: + # TODO: shall we support HGVSPositionModel outside of CDS for protein? + pass + + + @staticmethod + def _determine_region(cds, dis_to_exon_boundary): + if dis_to_exon_boundary < 0: + return "u" + elif dis_to_exon_boundary > 0: + return "d" + else: # in translation range, check if in CDS or not + if cds < 0: # before CDS + return "-" + elif cds > 0: # after CDS + return "*" + else: + return "" \ No newline at end of file From 5122c556840b7d143bb98f7ff5819a189a5df0ef Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Fri, 20 Feb 2026 11:36:05 +0100 Subject: [PATCH 003/127] Refactor(locus):return dict position model and update test --- mutalyzer_crossmapper/locus.py | 26 +++++++++++++------------- tests/test_locus.py | 32 ++++++++++++++++---------------- 2 files changed, 29 insertions(+), 29 deletions(-) diff --git a/mutalyzer_crossmapper/locus.py b/mutalyzer_crossmapper/locus.py index 14a9d20..a692f61 100644 --- a/mutalyzer_crossmapper/locus.py +++ b/mutalyzer_crossmapper/locus.py @@ -11,32 +11,32 @@ def __init__(self, location, inverted=False): self._end = self.boundary[1] - self.boundary[0] def to_position(self, coordinate): - """Convert a coordinate to a proper position. + """Convert a coordinate to a proper position model. :arg int coordinate: Coordinate. - :returns tuple: Position. + :returns dict: Position model with 'position' and 'offset' keys. """ if self._inverted: if coordinate > self.boundary[1]: - return 0, self.boundary[1] - coordinate + return {"position": 0, "offset": self.boundary[1] - coordinate} if coordinate < self.boundary[0]: - return self._end, self.boundary[0] - coordinate - return self.boundary[1] - coordinate, 0 + return {"position": self._end, "offset": self.boundary[0] - coordinate} + return {"position": self.boundary[1] - coordinate, "offset": 0} - if coordinate < self.boundary[0]: - return 0, coordinate - self.boundary[0] - if coordinate > self.boundary[1]: - return self._end, coordinate - self.boundary[1] - return coordinate - self.boundary[0], 0 + if coordinate < self.boundary[0]: # upstream of an exon, re + return {"position": 0, "offset": coordinate - self.boundary[0]} + if coordinate > self.boundary[1]: # downstream of an exon + return {"position": self._end, "offset": coordinate - self.boundary[1]} + return {"position": coordinate - self.boundary[0], "offset": 0} def to_coordinate(self, position): """Convert a position to a coordinate. - :arg int position: Position. + :arg dict position: Position model with 'position' and 'offset' keys. :returns int: Coordinate. """ if self._inverted: - return self.boundary[1] - position[0] - position[1] - return self.boundary[0] + position[0] + position[1] + return self.boundary[1] - position["position"] - position["offset"] + return self.boundary[0] + position["position"] + position["offset"] diff --git a/tests/test_locus.py b/tests/test_locus.py index a873416..b650c1a 100644 --- a/tests/test_locus.py +++ b/tests/test_locus.py @@ -7,37 +7,37 @@ def test_Locus(): """Forward orientent Lovus.""" locus = Locus((30, 35)) - invariant(locus.to_position, 29, locus.to_coordinate, (0, -1)) - invariant(locus.to_position, 30, locus.to_coordinate, (0, 0)) - invariant(locus.to_position, 31, locus.to_coordinate, (1, 0)) - invariant(locus.to_position, 33, locus.to_coordinate, (3, 0)) - invariant(locus.to_position, 34, locus.to_coordinate, (4, 0)) - invariant(locus.to_position, 35, locus.to_coordinate, (4, 1)) + invariant(locus.to_position, 29, locus.to_coordinate, {"position": 0, "offset": -1}) + invariant(locus.to_position, 30, locus.to_coordinate, {"position": 0, "offset": 0}) + invariant(locus.to_position, 31, locus.to_coordinate, {"position": 1, "offset": 0}) + invariant(locus.to_position, 33, locus.to_coordinate, {"position": 3, "offset": 0}) + invariant(locus.to_position, 34, locus.to_coordinate, {"position": 4, "offset": 0}) + invariant(locus.to_position, 35, locus.to_coordinate, {"position": 4, "offset": 1}) def test_Locus_inverted(): """Reverse orientent Lovus.""" locus = Locus((30, 35), True) - invariant(locus.to_position, 35, locus.to_coordinate, (0, -1)) - invariant(locus.to_position, 34, locus.to_coordinate, (0, 0)) - invariant(locus.to_position, 33, locus.to_coordinate, (1, 0)) - invariant(locus.to_position, 31, locus.to_coordinate, (3, 0)) - invariant(locus.to_position, 30, locus.to_coordinate, (4, 0)) - invariant(locus.to_position, 29, locus.to_coordinate, (4, 1)) + invariant(locus.to_position, 35, locus.to_coordinate, {"position": 0, "offset": -1}) + invariant(locus.to_position, 34, locus.to_coordinate, {"position": 0, "offset": 0}) + invariant(locus.to_position, 33, locus.to_coordinate, {"position": 1, "offset": 0}) + invariant(locus.to_position, 31, locus.to_coordinate, {"position": 3, "offset": 0}) + invariant(locus.to_position, 30, locus.to_coordinate, {"position": 4, "offset": 0}) + invariant(locus.to_position, 29, locus.to_coordinate, {"position": 4, "offset": 1}) def test_Locus_degenerate(): """Degenerate positions are silently corrected.""" locus = Locus((10, 20)) - degenerate_equal(locus.to_coordinate, 9, [(0, -1), (-1, 0)]) - degenerate_equal(locus.to_coordinate, 20, [(9, 1), (10, 0)]) + degenerate_equal(locus.to_coordinate, 9, [{"position": 0, "offset": -1}, {"position": -1, "offset": 0}]) + degenerate_equal(locus.to_coordinate, 20, [{"position": 9, "offset": 1}, {"position": 10, "offset": 0}]) def test_Locus_inverted_degenerate(): """Degenerate positions are silently corrected.""" locus = Locus((10, 20), True) - degenerate_equal(locus.to_coordinate, 20, [(0, -1), (-1, 0)]) - degenerate_equal(locus.to_coordinate, 9, [(9, 1), (10, 0)]) + degenerate_equal(locus.to_coordinate, 20, [{"position": 0, "offset": -1}, {"position": -1, "offset": 0}]) + degenerate_equal(locus.to_coordinate, 9, [{"position": 9, "offset": 1}, {"position": 10, "offset": 0}]) From 836d7d3669561bba12e75494daf31c730717f222 Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Fri, 20 Feb 2026 12:52:19 +0100 Subject: [PATCH 004/127] Refactor(multi_locus):return dict position model and update test --- mutalyzer_crossmapper/multi_locus.py | 30 ++++---- tests/test_multi_locus.py | 110 ++++++++++++++++++--------- 2 files changed, 88 insertions(+), 52 deletions(-) diff --git a/mutalyzer_crossmapper/multi_locus.py b/mutalyzer_crossmapper/multi_locus.py index 9836549..d1f65b8 100644 --- a/mutalyzer_crossmapper/multi_locus.py +++ b/mutalyzer_crossmapper/multi_locus.py @@ -19,7 +19,7 @@ def _offsets(locations, orientation): class MultiLocus(object): """MultiLocus object.""" - def __init__(self, locations, inverted=False): + def __init__(self, locations:list, inverted=False): """ :arg list locations: List of locus locations. :arg bool inverted: Orientation. @@ -36,7 +36,7 @@ def _direction(self, index): return len(self._offsets) - index - 1 return index - def outside(self, coordinate): + def outside(self, coordinate:int): """Calculate the offset relative to this MultiLocus. :arg int coordinate: Coordinate. @@ -49,32 +49,34 @@ def outside(self, coordinate): return coordinate - self._loci[-1].boundary[1] return 0 - def to_position(self, coordinate): + def to_position(self, coordinate:int): """Convert a coordinate to a position. :arg int coordinate: Coordinate. - :returns tuple: Position. + :returns dict: Position model. """ index = nearest_location(self._locations, coordinate, self._inverted) outside = self._orientation * self.outside(coordinate) + region = "u" if outside < 0 else "d" if outside > 0 else "" location = self._loci[index].to_position(coordinate) - return ( - location[0] + self._offsets[self._direction(index)], - location[1], - outside) + return {"position": location["position"] + self._offsets[self._direction(index)], + "offset": location["offset"], + "region": region} - def to_coordinate(self, position): - """Convert a position to a coordinate. + def to_coordinate(self, position_model:dict): + """Convert a position model to a coordinate. - :arg int position: Position. + :arg dict position: Position. :returns int: Coordinate. """ + offset_val = position_model["offset"] index = min( len(self._offsets), - max(0, bisect_right(self._offsets, position[0]) - 1)) - + max(0, bisect_right(self._offsets, position_model["position"]) - 1) + ) return self._loci[self._direction(index)].to_coordinate( - (position[0] - self._offsets[index], position[1])) + {"position": position_model["position"] - self._offsets[index], "offset": offset_val} + ) \ No newline at end of file diff --git a/tests/test_multi_locus.py b/tests/test_multi_locus.py index 6ce0013..1e3812a 100644 --- a/tests/test_multi_locus.py +++ b/tests/test_multi_locus.py @@ -1,3 +1,5 @@ +"""Tests for MultiLocus flattening and coordinate conversions.""" + from mutalyzer_crossmapper import MultiLocus from mutalyzer_crossmapper.multi_locus import _offsets @@ -15,7 +17,11 @@ def test_offsets_inverted(): """Cummulative location lengths for inverted list of locations.""" assert _offsets(_locations, -1) == [0, 2, 4, 8, 13, 19] - + invariant( + multi_locus.to_position, + 4, + multi_locus.to_coordinate, + {"position": 0, "offset": -1, "region": "u"}, def test_offsets_adjacent(): """Cummulative location lengths for adjacent locations.""" assert _offsets([(1, 3), (3, 5)], 1) == [0, 2] @@ -31,30 +37,32 @@ def test_MultiLocus(): multi_locus = MultiLocus(_locations) # Boundary between upstream and the first locus. + invariant(multi_locus.to_position, 4, multi_locus.to_coordinate, {"position": 0, "offset": -1, "region": "u"}, + ) + invariant( - multi_locus.to_position, 4, multi_locus.to_coordinate, (0, -1, -1)) - invariant( - multi_locus.to_position, 5, multi_locus.to_coordinate, (0, 0, 0)) + multi_locus.to_position, 5, multi_locus.to_coordinate, {"position": 0, "offset": 0, "region": ""}, + ) # Internal locus. invariant( - multi_locus.to_position, 29, multi_locus.to_coordinate, (9, -1, 0)) + multi_locus.to_position, 29, multi_locus.to_coordinate, {"position": 9, "offset": -1, "region": ""}) invariant( - multi_locus.to_position, 30, multi_locus.to_coordinate, (9, 0, 0)) + multi_locus.to_position, 30, multi_locus.to_coordinate, {"position": 9, "offset": 0, "region": ""}) invariant( - multi_locus.to_position, 31, multi_locus.to_coordinate, (10, 0, 0)) + multi_locus.to_position, 31, multi_locus.to_coordinate, {"position": 10, "offset": 0, "region": ""}) invariant( - multi_locus.to_position, 33, multi_locus.to_coordinate, (12, 0, 0)) + multi_locus.to_position, 33, multi_locus.to_coordinate, {"position": 12, "offset": 0, "region": ""}) invariant( - multi_locus.to_position, 34, multi_locus.to_coordinate, (13, 0, 0)) + multi_locus.to_position, 34, multi_locus.to_coordinate, {"position": 13, "offset": 0, "region": ""}) invariant( - multi_locus.to_position, 35, multi_locus.to_coordinate, (13, 1, 0)) + multi_locus.to_position, 35, multi_locus.to_coordinate, {"position": 13, "offset": 1, "region": ""}) # Boundary between the last locus and downstream. invariant( - multi_locus.to_position, 71, multi_locus.to_coordinate, (21, 0, 0)) + multi_locus.to_position, 71, multi_locus.to_coordinate, {"position": 21, "offset": 0, "region": ""}) invariant( - multi_locus.to_position, 72, multi_locus.to_coordinate, (21, 1, 1)) + multi_locus.to_position, 72, multi_locus.to_coordinate, {"position": 21, "offset": 1, "region": "d"}) def test_MultiLocus_inverted(): @@ -63,29 +71,29 @@ def test_MultiLocus_inverted(): # Boundary between upstream and the first locus. invariant( - multi_locus.to_position, 72, multi_locus.to_coordinate, (0, -1, -1)) + multi_locus.to_position, 72, multi_locus.to_coordinate, {"position": 0, "offset": -1, "region": "u"}) invariant( - multi_locus.to_position, 71, multi_locus.to_coordinate, (0, 0, 0)) + multi_locus.to_position, 71, multi_locus.to_coordinate, {"position": 0, "offset": 0, "region": ""}) # Internal locus. invariant( - multi_locus.to_position, 35, multi_locus.to_coordinate, (8, -1, 0)) + multi_locus.to_position, 35, multi_locus.to_coordinate, {"position": 8, "offset": -1, "region": ""}) invariant( - multi_locus.to_position, 34, multi_locus.to_coordinate, (8, 0, 0)) + multi_locus.to_position, 34, multi_locus.to_coordinate, {"position": 8, "offset": 0, "region": ""}) invariant( - multi_locus.to_position, 33, multi_locus.to_coordinate, (9, 0, 0)) + multi_locus.to_position, 33, multi_locus.to_coordinate, {"position": 9, "offset": 0, "region": ""}) invariant( - multi_locus.to_position, 31, multi_locus.to_coordinate, (11, 0, 0)) + multi_locus.to_position, 31, multi_locus.to_coordinate, {"position": 11, "offset": 0, "region": ""}) invariant( - multi_locus.to_position, 30, multi_locus.to_coordinate, (12, 0, 0)) + multi_locus.to_position, 30, multi_locus.to_coordinate, {"position": 12, "offset": 0, "region": ""}) invariant( - multi_locus.to_position, 29, multi_locus.to_coordinate, (12, 1, 0)) + multi_locus.to_position, 29, multi_locus.to_coordinate, {"position": 12, "offset": 1, "region": ""}) # Boundary between the last locus and downstream. invariant( - multi_locus.to_position, 5, multi_locus.to_coordinate, (21, 0, 0)) + multi_locus.to_position, 5, multi_locus.to_coordinate, {"position": 21, "offset": 0, "region": ""}) invariant( - multi_locus.to_position, 4, multi_locus.to_coordinate, (21, 1, 1)) + multi_locus.to_position, 4, multi_locus.to_coordinate, {"position": 21, "offset": 1, "region": "d"}) def test_MultiLocus_adjacent_loci(): @@ -93,9 +101,9 @@ def test_MultiLocus_adjacent_loci(): multi_locus = MultiLocus([(1, 3), (3, 5)]) invariant( - multi_locus.to_position, 2, multi_locus.to_coordinate, (1, 0, 0)) + multi_locus.to_position, 2, multi_locus.to_coordinate, {"position": 1, "offset": 0, "region": ""}) invariant( - multi_locus.to_position, 3, multi_locus.to_coordinate, (2, 0, 0)) + multi_locus.to_position, 3, multi_locus.to_coordinate, {"position": 2, "offset": 0, "region": ""}) def test_MultiLocus_adjacent_loci_inverted(): @@ -103,9 +111,9 @@ def test_MultiLocus_adjacent_loci_inverted(): multi_locus = MultiLocus([(1, 3), (3, 5)], True) invariant( - multi_locus.to_position, 3, multi_locus.to_coordinate, (1, 0, 0)) + multi_locus.to_position, 3, multi_locus.to_coordinate, {"position": 1, "offset": 0, "region": ""}) invariant( - multi_locus.to_position, 2, multi_locus.to_coordinate, (2, 0, 0)) + multi_locus.to_position, 2, multi_locus.to_coordinate, {"position": 2, "offset": 0, "region": ""}) def test_MultiLocus_offsets_odd(): @@ -113,9 +121,9 @@ def test_MultiLocus_offsets_odd(): multi_locus = MultiLocus([(1, 3), (6, 8)]) invariant( - multi_locus.to_position, 4, multi_locus.to_coordinate, (1, 2, 0)) + multi_locus.to_position, 4, multi_locus.to_coordinate, {"position": 1, "offset": 2, "region": ""}) invariant( - multi_locus.to_position, 5, multi_locus.to_coordinate, (2, -1, 0)) + multi_locus.to_position, 5, multi_locus.to_coordinate, {"position": 2, "offset": -1, "region": ""}) def test_MultiLocus_offsets_odd_inverted(): @@ -123,9 +131,9 @@ def test_MultiLocus_offsets_odd_inverted(): multi_locus = MultiLocus([(1, 3), (6, 8)], True) invariant( - multi_locus.to_position, 4, multi_locus.to_coordinate, (1, 2, 0)) + multi_locus.to_position, 4, multi_locus.to_coordinate, {"position": 1, "offset": 2, "region": ""}) invariant( - multi_locus.to_position, 3, multi_locus.to_coordinate, (2, -1, 0)) + multi_locus.to_position, 3, multi_locus.to_coordinate, {"position": 2, "offset": -1, "region": ""}) def test_MultiLocus_offsets_even(): @@ -133,9 +141,9 @@ def test_MultiLocus_offsets_even(): multi_locus = MultiLocus([(1, 3), (7, 9)]) invariant( - multi_locus.to_position, 4, multi_locus.to_coordinate, (1, 2, 0)) + multi_locus.to_position, 4, multi_locus.to_coordinate, {"position": 1, "offset": 2, "region": ""}) invariant( - multi_locus.to_position, 5, multi_locus.to_coordinate, (2, -2, 0)) + multi_locus.to_position, 5, multi_locus.to_coordinate, {"position": 2, "offset": -2, "region": ""}) def test_MultiLocus_offsets_even_inverted(): @@ -143,9 +151,9 @@ def test_MultiLocus_offsets_even_inverted(): multi_locus = MultiLocus([(1, 3), (7, 9)], True) invariant( - multi_locus.to_position, 5, multi_locus.to_coordinate, (1, 2, 0)) + multi_locus.to_position, 5, multi_locus.to_coordinate, {"position": 1, "offset": 2, "region": ""}) invariant( - multi_locus.to_position, 4, multi_locus.to_coordinate, (2, -2, 0)) + multi_locus.to_position, 4, multi_locus.to_coordinate, {"position": 2, "offset": -2, "region": ""}) def test_MultiLocus_degenerate(): @@ -153,9 +161,22 @@ def test_MultiLocus_degenerate(): multi_locus = MultiLocus(_locations) degenerate_equal( - multi_locus.to_coordinate, 4, [(0, -1, -1), (-1, 0, -1)]) + multi_locus.to_coordinate, + 4, + [ + {"position": 0, "offset": -1, "region": "u"}, + {"position": -1, "offset": 0, "region": "u"}, + ], + ) + degenerate_equal( - multi_locus.to_coordinate, 72, [(21, 1, 1), (22, 0, 1)]) + multi_locus.to_coordinate, + 72, + [ + {"position": 21, "offset": 1, "region": "d"}, + {"position": 22, "offset": 0, "region": "d"}, + ], + ) def test_MultiLocus_inverted_degenerate(): @@ -163,6 +184,19 @@ def test_MultiLocus_inverted_degenerate(): multi_locus = MultiLocus(_locations, True) degenerate_equal( - multi_locus.to_coordinate, 72, [(0, -1, -1), (-1, 0, -1)]) + multi_locus.to_coordinate, + 72, + [ + {"position": 0, "offset": -1, "region": "u"}, + {"position": -1, "offset": 0, "region": "u"}, + ], + ) + degenerate_equal( - multi_locus.to_coordinate, 4, [(21, 1, 1), (22, 0, 1)]) + multi_locus.to_coordinate, + 4, + [ + {"position": 21, "offset": 1, "region": "d"}, + {"position": 22, "offset": 0, "region": "d"}, + ], + ) From 89cdba7fb14545df896b8f20f049cbd4ed931d2e Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Fri, 20 Feb 2026 12:56:20 +0100 Subject: [PATCH 005/127] Fix typo --- tests/test_multi_locus.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/tests/test_multi_locus.py b/tests/test_multi_locus.py index 1e3812a..a0eea27 100644 --- a/tests/test_multi_locus.py +++ b/tests/test_multi_locus.py @@ -1,5 +1,3 @@ -"""Tests for MultiLocus flattening and coordinate conversions.""" - from mutalyzer_crossmapper import MultiLocus from mutalyzer_crossmapper.multi_locus import _offsets @@ -17,11 +15,6 @@ def test_offsets_inverted(): """Cummulative location lengths for inverted list of locations.""" assert _offsets(_locations, -1) == [0, 2, 4, 8, 13, 19] - invariant( - multi_locus.to_position, - 4, - multi_locus.to_coordinate, - {"position": 0, "offset": -1, "region": "u"}, def test_offsets_adjacent(): """Cummulative location lengths for adjacent locations.""" assert _offsets([(1, 3), (3, 5)], 1) == [0, 2] From 2d4e2eb292ea52b45277ba041609841481ab659a Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Fri, 20 Feb 2026 14:11:11 +0100 Subject: [PATCH 006/127] Refactor(crossmapper): convert Genomic to dict-based position model --- mutalyzer_crossmapper/crossmapper.py | 11 ++++++----- tests/test_crossmapper.py | 4 ++-- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/mutalyzer_crossmapper/crossmapper.py b/mutalyzer_crossmapper/crossmapper.py index 537efd3..def779b 100644 --- a/mutalyzer_crossmapper/crossmapper.py +++ b/mutalyzer_crossmapper/crossmapper.py @@ -1,3 +1,4 @@ +from turtle import position from .multi_locus import MultiLocus @@ -8,18 +9,18 @@ def coordinate_to_genomic(self, coordinate): :arg int coordinate: Coordinate. - :returns int: Genomic position. + :returns dict: Genomic position. """ - return coordinate + 1 + return {"position": coordinate + 1} - def genomic_to_coordinate(self, position): + def genomic_to_coordinate(self, position_m): """Convert a genomic position (g./m./o.) to a coordinate. - :arg int position: Genomic position. + :arg int position: Genomic position model. :returns int: Coordinate. """ - return position - 1 + return position_m["position"] - 1 class NonCoding(Genomic): diff --git a/tests/test_crossmapper.py b/tests/test_crossmapper.py index dbe6b47..1d019fb 100644 --- a/tests/test_crossmapper.py +++ b/tests/test_crossmapper.py @@ -11,9 +11,9 @@ def test_Genomic(): crossmap = Genomic() invariant( - crossmap.coordinate_to_genomic, 0, crossmap.genomic_to_coordinate, 1) + crossmap.coordinate_to_genomic, 0, crossmap.genomic_to_coordinate, {"position": 1}) invariant( - crossmap.coordinate_to_genomic, 98, crossmap.genomic_to_coordinate, 99) + crossmap.coordinate_to_genomic, 98, crossmap.genomic_to_coordinate, {"position": 99}) def test_NonCoding(): From 923cd9fb406b3ee916b17a87ed2a11d6cbc85285 Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Fri, 20 Feb 2026 17:44:20 +0100 Subject: [PATCH 007/127] Refactor(crossmapper): convert NonCoding to dict-based position model --- mutalyzer_crossmapper/crossmapper.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/mutalyzer_crossmapper/crossmapper.py b/mutalyzer_crossmapper/crossmapper.py index def779b..07285b7 100644 --- a/mutalyzer_crossmapper/crossmapper.py +++ b/mutalyzer_crossmapper/crossmapper.py @@ -1,4 +1,3 @@ -from turtle import position from .multi_locus import MultiLocus @@ -39,23 +38,24 @@ def coordinate_to_noncoding(self, coordinate): :arg int coordinate: Coordinate. - :returns tuple: Noncoding position. + :returns dict: Noncoding position model. """ - pos = self._noncoding.to_position(coordinate) - - return pos[0] + 1, pos[1], pos[2] + pos_m = self._noncoding.to_position(coordinate) + if pos_m["region"] == "": + pos_m["position"] = pos_m["position"] + 1 + return pos_m - def noncoding_to_coordinate(self, position): + def noncoding_to_coordinate(self, position_m): """Convert a noncoding position (n./r.) to a coordinate. - :arg tuple position: Noncoding position. + :arg dict postion_m: Noncoding position model. :returns int: Coordinate. """ - if position[0] > 0: - return self._noncoding.to_coordinate( - (position[0] - 1, position[1])) - return self._noncoding.to_coordinate(position) + if position_m["region"] == "": + # if position_m["position"] > 0: + position_m["position"] = position_m["position"] - 1 + return self._noncoding.to_coordinate(position_m) class Coding(NonCoding): From 259481b05d519ebf4df106b2c8112c899ca4e231 Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Fri, 20 Feb 2026 17:49:41 +0100 Subject: [PATCH 008/127] Refactor (locus, multi_locus): allow 'u', 'd' for UTR areas in hgvs model --- mutalyzer_crossmapper/locus.py | 17 +++++++++--- mutalyzer_crossmapper/multi_locus.py | 39 ++++++++++++++++++---------- 2 files changed, 38 insertions(+), 18 deletions(-) diff --git a/mutalyzer_crossmapper/locus.py b/mutalyzer_crossmapper/locus.py index a692f61..7e26235 100644 --- a/mutalyzer_crossmapper/locus.py +++ b/mutalyzer_crossmapper/locus.py @@ -30,13 +30,22 @@ def to_position(self, coordinate): return {"position": self._end, "offset": coordinate - self.boundary[1]} return {"position": coordinate - self.boundary[0], "offset": 0} - def to_coordinate(self, position): - """Convert a position to a coordinate. + def to_coordinate(self, position_m): + """Convert a position model to a coordinate. :arg dict position: Position model with 'position' and 'offset' keys. :returns int: Coordinate. """ if self._inverted: - return self.boundary[1] - position["position"] - position["offset"] - return self.boundary[0] + position["position"] + position["offset"] + if position_m["region"] == "u": + return self.boundary[1] + position_m["position"] + elif position_m["region"] == "d": + return self.boundary[0] - position_m["position"] + return self.boundary[1] - position_m["position"] - position_m["offset"] + if position_m["region"] == "u": + return self.boundary[0] - position_m["position"] + elif position_m["region"] == "d": + return self.boundary[1] + position_m["position"] + else: + return self.boundary[0] + position_m["position"] + position_m["offset"] diff --git a/mutalyzer_crossmapper/multi_locus.py b/mutalyzer_crossmapper/multi_locus.py index d1f65b8..a7f1ca4 100644 --- a/mutalyzer_crossmapper/multi_locus.py +++ b/mutalyzer_crossmapper/multi_locus.py @@ -60,23 +60,34 @@ def to_position(self, coordinate:int): outside = self._orientation * self.outside(coordinate) region = "u" if outside < 0 else "d" if outside > 0 else "" location = self._loci[index].to_position(coordinate) - - return {"position": location["position"] + self._offsets[self._direction(index)], - "offset": location["offset"], - "region": region} - - def to_coordinate(self, position_model:dict): + if not outside: + return { + "position": location["position"] + self._offsets[self._direction(index)], + "offset": location["offset"], + "region": region} + else: + return { + "position": abs(self._offsets[self._direction(index)] - self._offsets[self._direction(index)] + 1), + "offset": 0, + "region":region + } + + def to_coordinate(self, position_m:dict): """Convert a position model to a coordinate. :arg dict position: Position. :returns int: Coordinate. """ - offset_val = position_model["offset"] - index = min( - len(self._offsets), - max(0, bisect_right(self._offsets, position_model["position"]) - 1) - ) - return self._loci[self._direction(index)].to_coordinate( - {"position": position_model["position"] - self._offsets[index], "offset": offset_val} - ) \ No newline at end of file + if position_m["region"] == "": + index = min( + len(self._offsets), + max(0, bisect_right(self._offsets, position_m["position"]) - 1) + ) + position_m["position"] = position_m["position"] - self._offsets[index] + + elif position_m["region"] == "u": + index = 0 + else: # "d" + index = len(self._offsets) -1 + return self._loci[self._direction(index)].to_coordinate(position_m) From 55fd346994bb8731eb854d03898ae99c3ca72858 Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Fri, 20 Feb 2026 17:50:30 +0100 Subject: [PATCH 009/127] Refactor test modules for Genomic and NonCoding --- tests/test_crossmapper.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/test_crossmapper.py b/tests/test_crossmapper.py index 1d019fb..ef68a48 100644 --- a/tests/test_crossmapper.py +++ b/tests/test_crossmapper.py @@ -23,18 +23,18 @@ def test_NonCoding(): # Boundary between upstream and transcript. invariant( crossmap.coordinate_to_noncoding, 4, - crossmap.noncoding_to_coordinate, (1, -1, -1)) + crossmap.noncoding_to_coordinate, {"position": 1, "offset": 0, "region":"u"}) invariant( crossmap.coordinate_to_noncoding, 5, - crossmap.noncoding_to_coordinate, (1, 0, 0)) + crossmap.noncoding_to_coordinate, {"position": 1, "offset": 0, "region": ""}) # Boundary between downstream and transcript. invariant( crossmap.coordinate_to_noncoding, 71, - crossmap.noncoding_to_coordinate, (22, 0, 0)) + crossmap.noncoding_to_coordinate, {"position": 22, "offset": 0, "region": ""}) invariant( crossmap.coordinate_to_noncoding, 72, - crossmap.noncoding_to_coordinate, (22, 1, 1)) + crossmap.noncoding_to_coordinate, {"position": 1, "offset": 0, "region": "d"}) def test_NonCoding_inverted(): @@ -44,18 +44,18 @@ def test_NonCoding_inverted(): # Boundary between upstream and transcript. invariant( crossmap.coordinate_to_noncoding, 72, - crossmap.noncoding_to_coordinate, (1, -1, -1)) + crossmap.noncoding_to_coordinate, {"position": 1, "offset": 0, "region": "u"}) invariant( crossmap.coordinate_to_noncoding, 71, - crossmap.noncoding_to_coordinate, (1, 0, 0)) + crossmap.noncoding_to_coordinate, {"position": 1, "offset": 0, "region": ""}) # Boundary between downstream and transcript. invariant( crossmap.coordinate_to_noncoding, 5, - crossmap.noncoding_to_coordinate, (22, 0, 0)) + crossmap.noncoding_to_coordinate, {"position": 22, "offset": 0, "region": ""}) invariant( crossmap.coordinate_to_noncoding, 4, - crossmap.noncoding_to_coordinate, (22, 1, 1)) + crossmap.noncoding_to_coordinate, {"position": 1, "offset": 0, "region": "d"}) def test_NonCoding_degenerate(): @@ -65,12 +65,12 @@ def test_NonCoding_degenerate(): # Boundary between upstream and transcript. degenerate_equal( crossmap.noncoding_to_coordinate, 4, - [(1, -1, -1), (-1, 0, -1)]) + [{"position": 1, "offset": 0, "region":"u"}]) # Boundary between downstream and transcript. degenerate_equal( crossmap.noncoding_to_coordinate, 72, - [(22, 1, 1), (23, 0, 1)]) + [{"position": 1, "offset": 0, "region": "d"}]) def test_NonCoding_inverted_degenerate(): @@ -80,12 +80,12 @@ def test_NonCoding_inverted_degenerate(): # Boundary between upstream and transcript. degenerate_equal( crossmap.noncoding_to_coordinate, 72, - [(1, -1, -1), (-1, 0, -1)]) + [{"position": 1, "offset": 0, "region": "u"}]) # Boundary between downstream and transcript. degenerate_equal( crossmap.noncoding_to_coordinate, 4, - [(22, 1, 1), (23, 0, 1)]) + [{"position": 1 , "offset": 0, "region": "d"}]) def test_Coding(): From 84127e5aabeca1609c94febd90af7201957b3fb2 Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Thu, 26 Feb 2026 09:53:56 +0100 Subject: [PATCH 010/127] Remove 'region' in locus position model --- mutalyzer_crossmapper/locus.py | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/mutalyzer_crossmapper/locus.py b/mutalyzer_crossmapper/locus.py index 7e26235..a531ecb 100644 --- a/mutalyzer_crossmapper/locus.py +++ b/mutalyzer_crossmapper/locus.py @@ -24,7 +24,7 @@ def to_position(self, coordinate): return {"position": self._end, "offset": self.boundary[0] - coordinate} return {"position": self.boundary[1] - coordinate, "offset": 0} - if coordinate < self.boundary[0]: # upstream of an exon, re + if coordinate < self.boundary[0]: # upstream of an exon return {"position": 0, "offset": coordinate - self.boundary[0]} if coordinate > self.boundary[1]: # downstream of an exon return {"position": self._end, "offset": coordinate - self.boundary[1]} @@ -38,14 +38,5 @@ def to_coordinate(self, position_m): :returns int: Coordinate. """ if self._inverted: - if position_m["region"] == "u": - return self.boundary[1] + position_m["position"] - elif position_m["region"] == "d": - return self.boundary[0] - position_m["position"] return self.boundary[1] - position_m["position"] - position_m["offset"] - if position_m["region"] == "u": - return self.boundary[0] - position_m["position"] - elif position_m["region"] == "d": - return self.boundary[1] + position_m["position"] - else: - return self.boundary[0] + position_m["position"] + position_m["offset"] + return self.boundary[0] + position_m["position"] + position_m["offset"] From eb6ce9b3d393e4089cbd368edfee720d2b150aaa Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Thu, 26 Feb 2026 10:01:09 +0100 Subject: [PATCH 011/127] Refactor(multi_locus): replace tuple with dict --- mutalyzer_crossmapper/multi_locus.py | 42 ++++++++++++++++++++-------- tests/test_multi_locus.py | 8 +++--- 2 files changed, 35 insertions(+), 15 deletions(-) diff --git a/mutalyzer_crossmapper/multi_locus.py b/mutalyzer_crossmapper/multi_locus.py index a7f1ca4..1e3f72a 100644 --- a/mutalyzer_crossmapper/multi_locus.py +++ b/mutalyzer_crossmapper/multi_locus.py @@ -60,16 +60,31 @@ def to_position(self, coordinate:int): outside = self._orientation * self.outside(coordinate) region = "u" if outside < 0 else "d" if outside > 0 else "" location = self._loci[index].to_position(coordinate) - if not outside: + # UTR + if outside: + return { + "position": abs(location["offset"]), + "offset": 0, + "region": region + } + # in exons + if location["offset"] == 0: # in an exon return { "position": location["position"] + self._offsets[self._direction(index)], - "offset": location["offset"], - "region": region} - else: - return { - "position": abs(self._offsets[self._direction(index)] - self._offsets[self._direction(index)] + 1), "offset": 0, - "region":region + "region": "" + } + elif location["offset"] < 0: # before an exon + return { + "position": self._offsets[self._direction(index)], + "offset": location["offset"], + "region": "" + } + else: # after an exon + return{ + "position": location["position"] + self._offsets[self._direction(index)], + "offset": location["offset"], + "region": "" } def to_coordinate(self, position_m:dict): @@ -85,9 +100,14 @@ def to_coordinate(self, position_m:dict): max(0, bisect_right(self._offsets, position_m["position"]) - 1) ) position_m["position"] = position_m["position"] - self._offsets[index] + return self._loci[self._direction(index)].to_coordinate(position_m) elif position_m["region"] == "u": - index = 0 - else: # "d" - index = len(self._offsets) -1 - return self._loci[self._direction(index)].to_coordinate(position_m) + if self._inverted: + return position_m["position"] + self._locations[-1][1] - 1 + return self._locations[0][0] - position_m["position"] + + else: # d + if self._inverted: + return self._locations[0][0] - position_m["position"] + return position_m["position"] + self._locations[-1][1] - 1 diff --git a/tests/test_multi_locus.py b/tests/test_multi_locus.py index a0eea27..dcd18ef 100644 --- a/tests/test_multi_locus.py +++ b/tests/test_multi_locus.py @@ -30,7 +30,7 @@ def test_MultiLocus(): multi_locus = MultiLocus(_locations) # Boundary between upstream and the first locus. - invariant(multi_locus.to_position, 4, multi_locus.to_coordinate, {"position": 0, "offset": -1, "region": "u"}, + invariant(multi_locus.to_position, 4, multi_locus.to_coordinate, {"position": 1, "offset": 0, "region": "u"}, ) invariant( @@ -55,7 +55,7 @@ def test_MultiLocus(): invariant( multi_locus.to_position, 71, multi_locus.to_coordinate, {"position": 21, "offset": 0, "region": ""}) invariant( - multi_locus.to_position, 72, multi_locus.to_coordinate, {"position": 21, "offset": 1, "region": "d"}) + multi_locus.to_position, 72, multi_locus.to_coordinate, {"position": 1, "offset": 0, "region": "d"}) def test_MultiLocus_inverted(): @@ -64,7 +64,7 @@ def test_MultiLocus_inverted(): # Boundary between upstream and the first locus. invariant( - multi_locus.to_position, 72, multi_locus.to_coordinate, {"position": 0, "offset": -1, "region": "u"}) + multi_locus.to_position, 72, multi_locus.to_coordinate, {"position": 1, "offset": 0, "region": "u"}) invariant( multi_locus.to_position, 71, multi_locus.to_coordinate, {"position": 0, "offset": 0, "region": ""}) @@ -86,7 +86,7 @@ def test_MultiLocus_inverted(): invariant( multi_locus.to_position, 5, multi_locus.to_coordinate, {"position": 21, "offset": 0, "region": ""}) invariant( - multi_locus.to_position, 4, multi_locus.to_coordinate, {"position": 21, "offset": 1, "region": "d"}) + multi_locus.to_position, 4, multi_locus.to_coordinate, {"position": 1, "offset": 0, "region": "d"}) def test_MultiLocus_adjacent_loci(): From 7d7bc9bd56dcb9ba04125b729a2e92738ed4aba7 Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Thu, 26 Feb 2026 10:30:27 +0100 Subject: [PATCH 012/127] Refactor crossmapper and tests, allow ''/*/- in position model for Coding class --- mutalyzer_crossmapper/crossmapper.py | 86 ++++++++++++---- tests/test_crossmapper.py | 145 ++++++++++++++++++++------- 2 files changed, 173 insertions(+), 58 deletions(-) diff --git a/mutalyzer_crossmapper/crossmapper.py b/mutalyzer_crossmapper/crossmapper.py index 07285b7..8efa979 100644 --- a/mutalyzer_crossmapper/crossmapper.py +++ b/mutalyzer_crossmapper/crossmapper.py @@ -53,7 +53,6 @@ def noncoding_to_coordinate(self, position_m): :returns int: Coordinate. """ if position_m["region"] == "": - # if position_m["position"] > 0: position_m["position"] = position_m["position"] - 1 return self._noncoding.to_coordinate(position_m) @@ -72,11 +71,11 @@ def __init__(self, locations, cds, inverted=False): b1 = self._noncoding.to_position(cds[1]) if self._inverted: - self._coding = (b1[0] + b1[1] + 1, b0[0] + b0[1] + 1) - self._cds_len = (b0[0] + b0[1]) - (b1[0] + b1[1]) + self._coding = (b1["position"] + b1["offset"] + 1, b0["position"] + b0["offset"] + 1) + self._cds_len = (b0["position"] + b0["offset"]) - (b1["position"] + b1["offset"]) else: - self._coding = (b0[0] + b0[1], b1[0] + b1[1]) - self._cds_len = (b1[0] + b1[1]) - (b0[0] + b0[1]) + self._coding = (b0["position"] + b0["offset"], b1["position"] + b1["offset"]) + self._cds_len = (b1["position"] + b1["offset"]) - (b0["position"] + b0["offset"]) def _coordinate_to_coding(self, coordinate): """Convert a coordinate to a coding position (c./r.). @@ -85,13 +84,31 @@ def _coordinate_to_coding(self, coordinate): :returns tuple: Coding position (c./r.). """ - pos = self._noncoding.to_position(coordinate) - - if pos[0] < self._coding[0]: - return pos[0] - self._coding[0], pos[1], -1, pos[2] - elif pos[0] >= self._coding[1]: - return pos[0] - self._coding[1] + 1, pos[1], 1, pos[2] - return pos[0] - self._coding[0] + 1, pos[1], 0, pos[2] + noncoding_pos = self._noncoding.to_position(coordinate) + + # on top of the noncoding position model, add CDs info + location = noncoding_pos["position"] + if noncoding_pos["region"] == "": + if location < self._coding[0]: # before CDs + return { + "position": self._coding[0] - location, + "offset": noncoding_pos["offset"], + "region": "-" + } + elif location >= self._coding[1]: # after CDs + return { + "position": location - self._coding[1] + 1, + "offset": noncoding_pos["offset"], + "region": "*" + } + else: + return { + "position": location - self._coding[0] + 1, + "offset": noncoding_pos["offset"], + "region": "" + } + else: + return noncoding_pos def coordinate_to_coding(self, coordinate, degenerate=False): """Convert a coordinate to a coding position (c./r.). @@ -113,21 +130,46 @@ def coordinate_to_coding(self, coordinate, degenerate=False): return pos - def coding_to_coordinate(self, position): + def coding_to_coordinate(self, pos_m): """Convert a coding position (c./r.) to a coordinate. :arg tuple position: Coding position (c./r.). :returns int: Coordinate. """ - if position[2] == -1: - return self._noncoding.to_coordinate( - (position[0] + self._coding[0], position[1])) - elif position[2] == 1: - return self._noncoding.to_coordinate( - (position[0] + self._coding[1] - 1, position[1])) - return self._noncoding.to_coordinate( - (position[0] + self._coding[0] - 1, position[1])) + region = pos_m["region"] + if region == "u": + noncoding_pos = { + "position": pos_m["position"], + "offset": 0, + "region": "u" + } + elif region == "d": + noncoding_pos = { + "position": pos_m["position"], + "offset": 0, + "region": "d" + } + elif region == "": + noncoding_pos = { + "position": pos_m["position"] + self._coding[0] -1, + "offset": pos_m["offset"], + "region": "" + } + elif region == "-": + noncoding_pos = { + "position": self._coding[0] - pos_m["position"], + "offset": pos_m["offset"], + "region": "" + } + else: # * + noncoding_pos = { + "position": self._coding[1] + pos_m["position"] - 1, + "offset": pos_m["offset"], + "region": "" + } + return self._noncoding.to_coordinate(noncoding_pos) + def coordinate_to_protein(self, coordinate): """Convert a coordinate to a protein position (p.). @@ -138,7 +180,7 @@ def coordinate_to_protein(self, coordinate): """ pos = self.coordinate_to_coding(coordinate) - if pos[2] == -1: + if pos[2] == -1: # before CDs return (pos[0] // 3, pos[0] % 3 + 1, *pos[1:]) return ((pos[0] + 2) // 3, (pos[0] + 2) % 3 + 1, *pos[1:]) diff --git a/tests/test_crossmapper.py b/tests/test_crossmapper.py index ef68a48..e2080ac 100644 --- a/tests/test_crossmapper.py +++ b/tests/test_crossmapper.py @@ -95,18 +95,37 @@ def test_Coding(): # Boundary between 5' and CDS. invariant( crossmap.coordinate_to_coding, 31, - crossmap.coding_to_coordinate, (-1, 0, -1, 0)) + crossmap.coding_to_coordinate, + {"position": 1, + "offset":0, + "region":"-" + } + ) invariant( crossmap.coordinate_to_coding, 32, - crossmap.coding_to_coordinate, (1, 0, 0, 0)) + crossmap.coding_to_coordinate, + {"position": 1, + "offset":0, + "region":"" + } + ) # Boundary between CDS and 3'. invariant( crossmap.coordinate_to_coding, 42, - crossmap.coding_to_coordinate, (6, 0, 0, 0)) + crossmap.coding_to_coordinate, + {"position": 6, + "offset":0, + "region":"" + }) invariant( crossmap.coordinate_to_coding, 43, - crossmap.coding_to_coordinate, (1, 0, 1, 0)) + crossmap.coding_to_coordinate, + {"position": 1, + "offset":0, + "region":"*" + } + ) def test_Coding_inverted(): @@ -116,18 +135,42 @@ def test_Coding_inverted(): # Boundary between 5' and CDS. invariant( crossmap.coordinate_to_coding, 43, - crossmap.coding_to_coordinate, (-1, 0, -1, 0)) + crossmap.coding_to_coordinate, + { + "position": 1, + "offset": 0, + "region": "-" + } + ) invariant( crossmap.coordinate_to_coding, 42, - crossmap.coding_to_coordinate, (1, 0, 0, 0)) + crossmap.coding_to_coordinate, + { + "position": 1, + "offset": 0, + "region": "" + } + ) # Boundary between CDS and 3'. invariant( crossmap.coordinate_to_coding, 32, - crossmap.coding_to_coordinate, (6, 0, 0, 0)) + crossmap.coding_to_coordinate, + { + "position": 6, + "offset": 0, + "region": "" + } + ) invariant( crossmap.coordinate_to_coding, 31, - crossmap.coding_to_coordinate, (1, 0, 1, 0)) + crossmap.coding_to_coordinate, + { + "position": 1, + "offset": 0, + "region": "*" + } + ) def test_Coding_regions(): @@ -137,18 +180,22 @@ def test_Coding_regions(): # Upstream odd length intron between two regions. invariant( crossmap.coordinate_to_coding, 25, - crossmap.coding_to_coordinate, (-1, 5, -1, 0)) + crossmap.coding_to_coordinate, + {'position': 1, 'offset': 5, 'region': '-'}) invariant( crossmap.coordinate_to_coding, 26, - crossmap.coding_to_coordinate, (1, -4, 0, 0)) + crossmap.coding_to_coordinate, + {'position': 1, 'offset': -4, 'region': ''}) # Downstream odd length intron between two regions. invariant( crossmap.coordinate_to_coding, 44, - crossmap.coding_to_coordinate, (10, 5, 0, 0)) + crossmap.coding_to_coordinate, + {'position': 10, 'offset': 5, 'region': ''}) invariant( crossmap.coordinate_to_coding, 45, - crossmap.coding_to_coordinate, (1, -4, 1, 0)) + crossmap.coding_to_coordinate, + {'position': 1, 'offset': -4, 'region': '*'}) def test_Coding_regions_inverted(): @@ -158,18 +205,22 @@ def test_Coding_regions_inverted(): # Upstream odd length intron between two regions. invariant( crossmap.coordinate_to_coding, 44, - crossmap.coding_to_coordinate, (-1, 5, -1, 0)) + crossmap.coding_to_coordinate, + {'position': 1, 'offset': 5, 'region': '-'}) invariant( crossmap.coordinate_to_coding, 43, - crossmap.coding_to_coordinate, (1, -4, 0, 0)) + crossmap.coding_to_coordinate, + {'position': 1, 'offset': -4, 'region': ''}) # Downstream odd length intron between two regions. invariant( crossmap.coordinate_to_coding, 25, - crossmap.coding_to_coordinate, (10, 5, 0, 0)) + crossmap.coding_to_coordinate, + {'position': 10, 'offset': 5, 'region': ''}) invariant( crossmap.coordinate_to_coding, 24, - crossmap.coding_to_coordinate, (1, -4, 1, 0)) + crossmap.coding_to_coordinate, + {'position': 1, 'offset': -4, 'region': '*'}) def test_Coding_no_utr5(): @@ -179,10 +230,12 @@ def test_Coding_no_utr5(): # Direct transition from upstream to CDS. invariant( crossmap.coordinate_to_coding, 9, - crossmap.coding_to_coordinate, (1, -1, 0, -1)) + crossmap.coding_to_coordinate, #(1, -1, 0, -1) + {'position': 1, 'offset': 0, 'region': 'u'}) invariant( crossmap.coordinate_to_coding, 10, - crossmap.coding_to_coordinate, (1, 0, 0, 0)) + crossmap.coding_to_coordinate, + {'position': 1, 'offset': 0, 'region': ''}) def test_Coding_no_utr5_inverted(): @@ -192,10 +245,12 @@ def test_Coding_no_utr5_inverted(): # Direct transition from upstream to CDS. invariant( crossmap.coordinate_to_coding, 20, - crossmap.coding_to_coordinate, (1, -1, 0, -1)) + crossmap.coding_to_coordinate, #(1, -1, 0, -1) + {'position': 1, 'offset': 0, 'region': 'u'}) invariant( crossmap.coordinate_to_coding, 19, - crossmap.coding_to_coordinate, (1, 0, 0, 0)) + crossmap.coding_to_coordinate, + {'position': 2, 'offset': 0, 'region': '-'}) def test_Coding_no_utr3(): @@ -203,12 +258,15 @@ def test_Coding_no_utr3(): crossmap = Coding([(10, 20)], (15, 20)) # Direct transition from CDS to downstream. + #TODO: invariant( crossmap.coordinate_to_coding, 19, - crossmap.coding_to_coordinate, (5, 0, 0, 0)) + crossmap.coding_to_coordinate, + {'position': 9, 'offset': 0, 'region': '*'}) invariant( crossmap.coordinate_to_coding, 20, - crossmap.coding_to_coordinate, (5, 1, 0, 1)) + crossmap.coding_to_coordinate, + {'position': 1, 'offset': 0, 'region': 'd'}) def test_Coding_no_utr3_inverted(): @@ -218,10 +276,12 @@ def test_Coding_no_utr3_inverted(): # Direct transition from CDS to downstream. invariant( crossmap.coordinate_to_coding, 10, - crossmap.coding_to_coordinate, (5, 0, 0, 0)) + crossmap.coding_to_coordinate, + {'position': 5, 'offset': 0, 'region': ''}) invariant( crossmap.coordinate_to_coding, 9, - crossmap.coding_to_coordinate, (5, 1, 0, 1)) + crossmap.coding_to_coordinate, + {'position': 1, 'offset': 0, 'region': 'd'}) def test_Coding_small_utr5(): @@ -231,13 +291,17 @@ def test_Coding_small_utr5(): # Transition from upstream to 5' UTR to CDS. invariant( crossmap.coordinate_to_coding, 9, - crossmap.coding_to_coordinate, (-1, -1, -1, -1)) + crossmap.coding_to_coordinate, #(-1, -1, -1, -1) + {'position': 1, 'offset': 0, 'region': 'u'}) invariant( crossmap.coordinate_to_coding, 10, - crossmap.coding_to_coordinate, (-1, 0, -1, 0)) + crossmap.coding_to_coordinate, #(-1, 0, -1, 0)) + {'position': 1, 'offset': 0, 'region': '-'} + ) invariant( crossmap.coordinate_to_coding, 11, - crossmap.coding_to_coordinate, (1, 0, 0, 0)) + crossmap.coding_to_coordinate, #(1, 0, 0, 0)) + {'position': 1, 'offset': 0, 'region': ''}) def test_Coding_small_utr5_inverted(): @@ -247,13 +311,16 @@ def test_Coding_small_utr5_inverted(): # Transition from upstream to 5' UTR to CDS. invariant( crossmap.coordinate_to_coding, 20, - crossmap.coding_to_coordinate, (-1, -1, -1, -1)) + crossmap.coding_to_coordinate,# (-1, -1, -1, -1) + {'position': 1, 'offset': 0, 'region': 'u'}) invariant( crossmap.coordinate_to_coding, 19, - crossmap.coding_to_coordinate, (-1, 0, -1, 0)) + crossmap.coding_to_coordinate, #(-1, 0, -1, 0)) + {'position': 1, 'offset': 0, 'region': '-'}) invariant( crossmap.coordinate_to_coding, 18, - crossmap.coding_to_coordinate, (1, 0, 0, 0)) + crossmap.coding_to_coordinate, + {'position': 1, 'offset': 0, 'region': ''}) def test_Coding_small_utr3(): @@ -263,13 +330,16 @@ def test_Coding_small_utr3(): # Transition from CDS to 3' UTR to downstream. invariant( crossmap.coordinate_to_coding, 18, - crossmap.coding_to_coordinate, (4, 0, 0, 0)) + crossmap.coding_to_coordinate, + {'position': 4, 'offset': 0, 'region': ''}) invariant( crossmap.coordinate_to_coding, 19, - crossmap.coding_to_coordinate, (1, 0, 1, 0)) + crossmap.coding_to_coordinate, #(1, 0, 1, 0) + {'position': 1, 'offset': 0, 'region': '*'}) invariant( crossmap.coordinate_to_coding, 20, - crossmap.coding_to_coordinate, (1, 1, 1, 1)) + crossmap.coding_to_coordinate, #(1, 1, 1, 1)) + {'position': 1, 'offset': 0, 'region': 'd'}) def test_Coding_small_utr3_inverted(): @@ -279,13 +349,16 @@ def test_Coding_small_utr3_inverted(): # Transition from CDS to 3' UTR to downstream. invariant( crossmap.coordinate_to_coding, 11, - crossmap.coding_to_coordinate, (4, 0, 0, 0)) + crossmap.coding_to_coordinate, + {'position': 4, 'offset': 0, 'region': ''}) invariant( crossmap.coordinate_to_coding, 10, - crossmap.coding_to_coordinate, (1, 0, 1, 0)) + crossmap.coding_to_coordinate, + {'position': 1, 'offset': 0, 'region': '*'}) invariant( crossmap.coordinate_to_coding, 9, - crossmap.coding_to_coordinate, (1, 1, 1, 1)) + crossmap.coding_to_coordinate, + {'position': 1, 'offset': 0, 'region': 'd'}) def test_Coding_degenerate(): From 043330389c052dc30b399142e2b17b7ec6c1df18 Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Thu, 26 Feb 2026 18:29:26 +0100 Subject: [PATCH 013/127] Refactor(multi_locus): degenerate positions for multi_locus and tests --- mutalyzer_crossmapper/multi_locus.py | 13 +++++++------ tests/test_multi_locus.py | 13 ++++++++----- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/mutalyzer_crossmapper/multi_locus.py b/mutalyzer_crossmapper/multi_locus.py index 1e3f72a..d7a52e9 100644 --- a/mutalyzer_crossmapper/multi_locus.py +++ b/mutalyzer_crossmapper/multi_locus.py @@ -94,7 +94,8 @@ def to_coordinate(self, position_m:dict): :returns int: Coordinate. """ - if position_m["region"] == "": + region = position_m["region"] + if region == "": index = min( len(self._offsets), max(0, bisect_right(self._offsets, position_m["position"]) - 1) @@ -102,12 +103,12 @@ def to_coordinate(self, position_m:dict): position_m["position"] = position_m["position"] - self._offsets[index] return self._loci[self._direction(index)].to_coordinate(position_m) - elif position_m["region"] == "u": + elif region == "u": if self._inverted: - return position_m["position"] + self._locations[-1][1] - 1 - return self._locations[0][0] - position_m["position"] + return abs(position_m["position"]) + self._locations[-1][1] + position_m["offset"] - 1 + return self._locations[0][0] - abs(position_m["position"]) + position_m["offset"] else: # d if self._inverted: - return self._locations[0][0] - position_m["position"] - return position_m["position"] + self._locations[-1][1] - 1 + return self._locations[0][0] - abs(position_m["position"]) + position_m["offset"] + return abs(position_m["position"]) + self._locations[-1][1] + position_m["offset"] - 1 diff --git a/tests/test_multi_locus.py b/tests/test_multi_locus.py index dcd18ef..922529a 100644 --- a/tests/test_multi_locus.py +++ b/tests/test_multi_locus.py @@ -158,6 +158,7 @@ def test_MultiLocus_degenerate(): 4, [ {"position": 0, "offset": -1, "region": "u"}, + {"position": 1, "offset": 0, "region": "u"}, {"position": -1, "offset": 0, "region": "u"}, ], ) @@ -166,8 +167,8 @@ def test_MultiLocus_degenerate(): multi_locus.to_coordinate, 72, [ - {"position": 21, "offset": 1, "region": "d"}, - {"position": 22, "offset": 0, "region": "d"}, + {"position": 0, "offset": 1, "region": "d"}, + {"position": 1, "offset": 0, "region": "d"}, ], ) @@ -180,8 +181,9 @@ def test_MultiLocus_inverted_degenerate(): multi_locus.to_coordinate, 72, [ - {"position": 0, "offset": -1, "region": "u"}, + {"position": 0, "offset": 1, "region": "u"}, {"position": -1, "offset": 0, "region": "u"}, + {"position": 1, "offset": 0, "region": "u"}, ], ) @@ -189,7 +191,8 @@ def test_MultiLocus_inverted_degenerate(): multi_locus.to_coordinate, 4, [ - {"position": 21, "offset": 1, "region": "d"}, - {"position": 22, "offset": 0, "region": "d"}, + {"position": 0, "offset": -1, "region": "d"}, + {"position": 1, "offset": 0, "region": "d"}, + {"position": 2, "offset": 1, "region": "d"} ], ) From a583f00fcc038753243c046d9462169996626f7b Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Fri, 27 Feb 2026 11:44:31 +0100 Subject: [PATCH 014/127] Refactor(crossmapper): implement hgvs position model for protein and write degenerate in the same flow. --- mutalyzer_crossmapper/crossmapper.py | 58 ++++++++++++++++++++-------- tests/test_crossmapper.py | 36 +++++++++++------ 2 files changed, 67 insertions(+), 27 deletions(-) diff --git a/mutalyzer_crossmapper/crossmapper.py b/mutalyzer_crossmapper/crossmapper.py index 8efa979..af2b3f4 100644 --- a/mutalyzer_crossmapper/crossmapper.py +++ b/mutalyzer_crossmapper/crossmapper.py @@ -119,14 +119,30 @@ def coordinate_to_coding(self, coordinate, degenerate=False): :returns tuple: Coding position (c./r.). """ pos = self._coordinate_to_coding(coordinate) - - if degenerate and pos[3]: - if pos[2] == 0: - if pos[0] == 1 and pos[1] < 0: - return pos[1], 0, -1, pos[3] - if pos[0] == self._cds_len and pos[1] > 0: - return pos[0] + pos[1] - self._cds_len, 0, 1, pos[3] - return pos[0] + pos[1], 0, pos[2], pos[3] + # degenerate option: allow multiple or less correct ways to describe one position, + # e.g., neucleo c.10 can be the same location as c.d1 (if CDs ends at c9) + # the previous version corrects location+offset to location in UTR area (c.1-2->c.-2) + # or merge the offset to location + + if degenerate and pos["region"] in ["u", "d"]: + # if pos["region"] == "": unlikely to happen in biology? maybe used to collapse HGVS location at CDs boundary? + if pos["position"] == 1 and pos["offset"] < 0: + return { + "position":pos["offset"], + "offset": 0, + "region": "-" + } + if pos["position"] == self._cds_len and pos["offset"] > 0: + return { + "position": pos["position"] + pos["offset"] - self._cds_len, + "offset": 0, + "region": "*" + } + return { + "position": pos["position"] + pos["offset"], + "offset": 0, + "region":pos["region"] + } return pos @@ -140,13 +156,13 @@ def coding_to_coordinate(self, pos_m): region = pos_m["region"] if region == "u": noncoding_pos = { - "position": pos_m["position"], + "position": abs(pos_m["position"]) + pos_m["offset"], "offset": 0, "region": "u" } elif region == "d": noncoding_pos = { - "position": pos_m["position"], + "position": abs(pos_m["position"]) + pos_m["offset"], "offset": 0, "region": "d" } @@ -180,9 +196,15 @@ def coordinate_to_protein(self, coordinate): """ pos = self.coordinate_to_coding(coordinate) - if pos[2] == -1: # before CDs - return (pos[0] // 3, pos[0] % 3 + 1, *pos[1:]) - return ((pos[0] + 2) // 3, (pos[0] + 2) % 3 + 1, *pos[1:]) + if pos["region"] in ["-", "*"]: + return { + "position": pos["position"] // 3 + 1, + "position_in_codon": pos["position"] % 3, + **{k: v for k, v in pos.items() if k != "position"}} + return { + "position": (pos["position"]+2) // 3, + "position_in_codon": (pos["position"]+2) % 3 + 1, + **{k: v for k, v in pos.items() if k != "position"}} def protein_to_coordinate(self, position): """Convert a protein position (p.) to a coordinate. @@ -191,9 +213,13 @@ def protein_to_coordinate(self, position): :returns int: Coordinate. """ - if position[3] == -1: + if position["region"] in ["-", "*"]: return self.coding_to_coordinate( - (3 * position[0] + position[1] - 1, *position[2:])) + {"position": 3 * position["position"] + position["position_in_codon"] - 3, + "offset": position["offset"], + "region": position["region"]}) return self.coding_to_coordinate( - (3 * position[0] + position[1] - 3, *position[2:])) + {"position": 3 * position["position"] + position["position_in_codon"] - 3, + "offset": position["offset"], + "region": position["region"]}) diff --git a/tests/test_crossmapper.py b/tests/test_crossmapper.py index e2080ac..e39225f 100644 --- a/tests/test_crossmapper.py +++ b/tests/test_crossmapper.py @@ -70,7 +70,10 @@ def test_NonCoding_degenerate(): # Boundary between downstream and transcript. degenerate_equal( crossmap.noncoding_to_coordinate, 72, - [{"position": 1, "offset": 0, "region": "d"}]) + [ + {"position": 1, "offset": 0, "region": "d"}, + {"position": 0, "offset": 1, "region": "d"} + ]) def test_NonCoding_inverted_degenerate(): @@ -87,7 +90,8 @@ def test_NonCoding_inverted_degenerate(): crossmap.noncoding_to_coordinate, 4, [{"position": 1 , "offset": 0, "region": "d"}]) - +_exons = [(5, 8), (14, 20), (30, 35), (40, 44), (50, 52), (70, 72)] +_cds = (32, 43) def test_Coding(): """Forward oriented coding transcript.""" crossmap = Coding(_exons, _cds) @@ -231,11 +235,11 @@ def test_Coding_no_utr5(): invariant( crossmap.coordinate_to_coding, 9, crossmap.coding_to_coordinate, #(1, -1, 0, -1) - {'position': 1, 'offset': 0, 'region': 'u'}) + {'position': 1, 'offset': 0, 'region': 'u'}) # serialize result : u1 invariant( crossmap.coordinate_to_coding, 10, crossmap.coding_to_coordinate, - {'position': 1, 'offset': 0, 'region': ''}) + {'position': 1, 'offset': 0, 'region': ''}) # serialize result: 1 def test_Coding_no_utr5_inverted(): @@ -367,7 +371,11 @@ def test_Coding_degenerate(): degenerate_equal( crossmap.coding_to_coordinate, 9, - [(-1, -1, -1, -1), (-2, 0, -1, -1), (1, -2, 0, -1), (1, -10, 1, -1)]) + [ + {'position': 1, 'offset': 8, 'region': 'u'}, + {'position': 8, 'offset': 1, 'region': 'u'}, + {'position': -1, 'offset': 10, 'region': 'u'} + ]) degenerate_equal( crossmap.coding_to_coordinate, 20, [(1, 1, 1, 1), (2, 0, 1, 1), (8, 2, 0, 1), (-1, 10, -1, 1)]) @@ -466,23 +474,29 @@ def test_Coding_protein(): # Boundary between 5' UTR and CDS. invariant( crossmap.coordinate_to_protein, 31, - crossmap.protein_to_coordinate, (-1, 3, 0, -1, 0)) + crossmap.protein_to_coordinate, + {'position': 1, "position_in_codon": 1, 'offset': 0, 'region': '-'}) invariant( crossmap.coordinate_to_protein, 32, - crossmap.protein_to_coordinate, (1, 1, 0, 0, 0)) + crossmap.protein_to_coordinate, + {'position': 1, "position_in_codon": 1, 'offset': 0, 'region': ''}) # Intron boundary. invariant( crossmap.coordinate_to_protein, 34, - crossmap.protein_to_coordinate, (1, 3, 0, 0, 0)) + crossmap.protein_to_coordinate, + {'position': 1, "position_in_codon": 3, 'offset': 0, 'region': ''}) invariant( crossmap.coordinate_to_protein, 35, - crossmap.protein_to_coordinate, (1, 3, 1, 0, 0)) + crossmap.protein_to_coordinate, + {'position': 1, "position_in_codon": 3, 'offset': 1, 'region': ''}) # Boundary between CDS and 3' UTR. invariant( crossmap.coordinate_to_protein, 42, - crossmap.protein_to_coordinate, (2, 3, 0, 0, 0)) + crossmap.protein_to_coordinate, + {'position': 2, "position_in_codon": 3, 'offset': 0, 'region': ''}) invariant( crossmap.coordinate_to_protein, 43, - crossmap.protein_to_coordinate, (1, 1, 0, 1, 0)) + crossmap.protein_to_coordinate, + {'position': 1, "position_in_codon": 1, 'offset': 0, 'region': '*'}) From a57a3e6177ccb29125f3451f6b34a6b16c24d34c Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Thu, 5 Mar 2026 11:54:02 +0100 Subject: [PATCH 015/127] Refactor(crossmapper): discard degenerate option --- mutalyzer_crossmapper/crossmapper.py | 27 +-------------------------- 1 file changed, 1 insertion(+), 26 deletions(-) diff --git a/mutalyzer_crossmapper/crossmapper.py b/mutalyzer_crossmapper/crossmapper.py index af2b3f4..be0d1e1 100644 --- a/mutalyzer_crossmapper/crossmapper.py +++ b/mutalyzer_crossmapper/crossmapper.py @@ -110,7 +110,7 @@ def _coordinate_to_coding(self, coordinate): else: return noncoding_pos - def coordinate_to_coding(self, coordinate, degenerate=False): + def coordinate_to_coding(self, coordinate): """Convert a coordinate to a coding position (c./r.). :arg int coordinate: Coordinate. @@ -119,31 +119,6 @@ def coordinate_to_coding(self, coordinate, degenerate=False): :returns tuple: Coding position (c./r.). """ pos = self._coordinate_to_coding(coordinate) - # degenerate option: allow multiple or less correct ways to describe one position, - # e.g., neucleo c.10 can be the same location as c.d1 (if CDs ends at c9) - # the previous version corrects location+offset to location in UTR area (c.1-2->c.-2) - # or merge the offset to location - - if degenerate and pos["region"] in ["u", "d"]: - # if pos["region"] == "": unlikely to happen in biology? maybe used to collapse HGVS location at CDs boundary? - if pos["position"] == 1 and pos["offset"] < 0: - return { - "position":pos["offset"], - "offset": 0, - "region": "-" - } - if pos["position"] == self._cds_len and pos["offset"] > 0: - return { - "position": pos["position"] + pos["offset"] - self._cds_len, - "offset": 0, - "region": "*" - } - return { - "position": pos["position"] + pos["offset"], - "offset": 0, - "region":pos["region"] - } - return pos def coding_to_coordinate(self, pos_m): From d245cd6ed7749ba25838409ff2090e392ce2cdab Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Thu, 5 Mar 2026 14:37:40 +0100 Subject: [PATCH 016/127] Change examples in README. --- README.rst | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/README.rst b/README.rst index 7862443..061f91b 100644 --- a/README.rst +++ b/README.rst @@ -53,8 +53,8 @@ positions and coordinates. >>> from mutalyzer_crossmapper import Genomic >>> crossmap = Genomic() >>> crossmap.coordinate_to_genomic(0) - 1 - >>> crossmap.genomic_to_coordinate(1) + {"position": 1} + >>> crossmap.genomic_to_coordinate({"position": 1}) 0 On top of the functionality provided by the ``Genomic`` class, the @@ -67,8 +67,8 @@ positions and coordinates. >>> exons = [(5, 8), (14, 20), (30, 35), (40, 44), (50, 52), (70, 72)] >>> crossmap = NonCoding(exons) >>> crossmap.coordinate_to_noncoding(35) - (14, 1, 0) - >>> crossmap.noncoding_to_coordinate((14, 1)) + {"position": 14, "offset": 1, "region": ""} + >>> crossmap.noncoding_to_coordinate({"position": 14, "offset": 1, "region": ""}) 35 Add the flag ``inverted=True`` to the constructor when the transcript resides @@ -84,8 +84,8 @@ coordinates as well as conversions between protein positions and coordinates. >>> cds = (32, 43) >>> crossmap = Coding(exons, cds) >>> crossmap.coordinate_to_coding(31) - (-1, 0, -1, 0) - >>> crossmap.coding_to_coordinate((-1, 0, -1)) + {"position": -1, "offset": 0, "region": "-"} + >>> crossmap.coding_to_coordinate({"position": -1, "offset": 0, "region": "-"}) 31 Again, the flag ``inverted=True`` can be used for transcripts that reside on @@ -96,8 +96,8 @@ Conversions between protein positions and coordinates are done as follows. .. code:: python >>> crossmap.coordinate_to_protein(41) - (2, 2, 0, 0, 0) - >>> crossmap.protein_to_coordinate((2, 2, 0, 0)) + {"position": 2, "position_in_codon": 2, "offset": 1, "region": ""} + >>> crossmap.protein_to_coordinate({"position": 2, "position_in_codon": 2, "offset": 1, "region": ""}) 41 From 89559e4f9e5c8ac142cc7683eada5616938bae58 Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Fri, 6 Mar 2026 10:54:50 +0100 Subject: [PATCH 017/127] Discard dataclass object for hgvs position model --- mutalyzer_crossmapper/hgvs_position_model.py | 92 -------------------- 1 file changed, 92 deletions(-) delete mode 100644 mutalyzer_crossmapper/hgvs_position_model.py diff --git a/mutalyzer_crossmapper/hgvs_position_model.py b/mutalyzer_crossmapper/hgvs_position_model.py deleted file mode 100644 index 7992c63..0000000 --- a/mutalyzer_crossmapper/hgvs_position_model.py +++ /dev/null @@ -1,92 +0,0 @@ -""" -HGVS Position Model - - a dataclass object to bridge HGVS position component and Crossmapper outputs. -""" -from dataclasses import dataclass -from typing import Optional, Tuple - -@dataclass -class HGVSPositionModel: - """ - Represent the position component of an HGVS variant description. - This model captures details necessary to describe the '[position]' part in an HGVS - description of the form - [reference sequence]:[sequence type].[position][variant type][change] - """ - position: int - offset: Optional[int] = None - region: Optional[str] = None - position_in_codon: Optional[int] = None - - - def __post_init__(self): - # validate position - if self.position <= 0: - raise ValueError("Position must be a positive integer.") - - # validate region - region_values = {"u", "-", "", "*", "d"} - if self.region is not None and self.region not in region_values: - raise ValueError( - f"Invalid region value: {self.region}. Allowed values are: {region_values}" - ) - - # validate position_in_codon - codon_values = {1, 2, 3} - if self.position_in_codon is not None and self.position_in_codon not in codon_values: - raise ValueError( - f"Invalid position in codon value: {self.position_in_codon}. " - f"Allowed values are: {codon_values}" - ) - - - # Convert from tuple to HGVSPositionModel - - #TODO: check for inverted and degerate options, now only support non-inverted and non-degenerate cases - @classmethod - def to_hgvs_position_model(cls, raw_tuple:Tuple): - """Convert crossmapper tuple to an HGVSPositionModel instance.""" - if not raw_tuple: - raise ValueError("Input tuple position cannot be empty.") - - # Genomic - if len(raw_tuple) == 1: - return cls(position=raw_tuple[0]) - # Non-coding - if len(raw_tuple) == 3: - pass - - # Coding - #(c_pos, offset, in_cds, offset_to_exon_boundary) - if len(raw_tuple) == 4: - c_pos, offset, cds, dis_to_exon_boundary = raw_tuple - region = cls._determine_region(cds, dis_to_exon_boundary) - return cls(position=c_pos, offset=offset, region=region) - - # Protein ( - if len(raw_tuple) == 5: - p_pos, codon_pos, offset, cds, dis_to_exon_boundary = raw_tuple - if cds == 0: # in CDS - return cls( - position=p_pos, - region="", - position_in_codon=codon_pos - ) - else: - # TODO: shall we support HGVSPositionModel outside of CDS for protein? - pass - - - @staticmethod - def _determine_region(cds, dis_to_exon_boundary): - if dis_to_exon_boundary < 0: - return "u" - elif dis_to_exon_boundary > 0: - return "d" - else: # in translation range, check if in CDS or not - if cds < 0: # before CDS - return "-" - elif cds > 0: # after CDS - return "*" - else: - return "" \ No newline at end of file From 08739ffbad75a76deb56ce4cc1930ad264e7cbf1 Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Fri, 6 Mar 2026 16:21:40 +0100 Subject: [PATCH 018/127] Refactor(crossmapper): allow degenerate option for 'u' and 'd' area --- mutalyzer_crossmapper/crossmapper.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/mutalyzer_crossmapper/crossmapper.py b/mutalyzer_crossmapper/crossmapper.py index be0d1e1..9056ade 100644 --- a/mutalyzer_crossmapper/crossmapper.py +++ b/mutalyzer_crossmapper/crossmapper.py @@ -110,7 +110,7 @@ def _coordinate_to_coding(self, coordinate): else: return noncoding_pos - def coordinate_to_coding(self, coordinate): + def coordinate_to_coding(self, coordinate, degenerate=False): """Convert a coordinate to a coding position (c./r.). :arg int coordinate: Coordinate. @@ -119,6 +119,13 @@ def coordinate_to_coding(self, coordinate): :returns tuple: Coding position (c./r.). """ pos = self._coordinate_to_coding(coordinate) + if degenerate and pos["region"] in ["u", "d"]: + if pos["region"] == "u": + pos["position"] = pos["position"] + self._coding[0] + pos["region"] = "-" + else: + pos["position"] = pos["position"] + self._coding[1] + pos["region"] = "*" return pos def coding_to_coordinate(self, pos_m): From fecf02617cdbb2a5d7c8c22ef216137d4abde455 Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Mon, 9 Mar 2026 11:15:12 +0100 Subject: [PATCH 019/127] Cleanup(locus) --- mutalyzer_crossmapper/locus.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/mutalyzer_crossmapper/locus.py b/mutalyzer_crossmapper/locus.py index a531ecb..738e87c 100644 --- a/mutalyzer_crossmapper/locus.py +++ b/mutalyzer_crossmapper/locus.py @@ -24,13 +24,13 @@ def to_position(self, coordinate): return {"position": self._end, "offset": self.boundary[0] - coordinate} return {"position": self.boundary[1] - coordinate, "offset": 0} - if coordinate < self.boundary[0]: # upstream of an exon + if coordinate < self.boundary[0]: return {"position": 0, "offset": coordinate - self.boundary[0]} - if coordinate > self.boundary[1]: # downstream of an exon + if coordinate > self.boundary[1]: return {"position": self._end, "offset": coordinate - self.boundary[1]} return {"position": coordinate - self.boundary[0], "offset": 0} - def to_coordinate(self, position_m): + def to_coordinate(self, pos_m): """Convert a position model to a coordinate. :arg dict position: Position model with 'position' and 'offset' keys. @@ -38,5 +38,5 @@ def to_coordinate(self, position_m): :returns int: Coordinate. """ if self._inverted: - return self.boundary[1] - position_m["position"] - position_m["offset"] - return self.boundary[0] + position_m["position"] + position_m["offset"] + return self.boundary[1] - pos_m["position"] - pos_m["offset"] + return self.boundary[0] + pos_m["position"] + pos_m["offset"] From 54746ec68c6304e324839987cc392a5554c97392 Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Mon, 9 Mar 2026 11:37:55 +0100 Subject: [PATCH 020/127] Cleanup(multi_locus) --- mutalyzer_crossmapper/multi_locus.py | 37 +++++++++++++++------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/mutalyzer_crossmapper/multi_locus.py b/mutalyzer_crossmapper/multi_locus.py index d7a52e9..b7b6ce4 100644 --- a/mutalyzer_crossmapper/multi_locus.py +++ b/mutalyzer_crossmapper/multi_locus.py @@ -54,61 +54,64 @@ def to_position(self, coordinate:int): :arg int coordinate: Coordinate. - :returns dict: Position model. + :returns dict: Position model 'position', 'offset' and 'region' keys. """ index = nearest_location(self._locations, coordinate, self._inverted) outside = self._orientation * self.outside(coordinate) region = "u" if outside < 0 else "d" if outside > 0 else "" location = self._loci[index].to_position(coordinate) - # UTR + if outside: return { "position": abs(location["offset"]), "offset": 0, "region": region } - # in exons - if location["offset"] == 0: # in an exon + + if location["offset"] == 0: return { "position": location["position"] + self._offsets[self._direction(index)], "offset": 0, "region": "" } - elif location["offset"] < 0: # before an exon + + elif location["offset"] < 0: return { "position": self._offsets[self._direction(index)], "offset": location["offset"], "region": "" } - else: # after an exon + + else: return{ "position": location["position"] + self._offsets[self._direction(index)], "offset": location["offset"], "region": "" } - def to_coordinate(self, position_m:dict): + def to_coordinate(self, pos_m:dict): """Convert a position model to a coordinate. - :arg dict position: Position. + :arg dict position: Position model with 'position','offset' and 'region' keys. :returns int: Coordinate. """ - region = position_m["region"] + region = pos_m["region"] + if region == "": index = min( len(self._offsets), - max(0, bisect_right(self._offsets, position_m["position"]) - 1) + max(0, bisect_right(self._offsets, pos_m["position"]) - 1) ) - position_m["position"] = position_m["position"] - self._offsets[index] - return self._loci[self._direction(index)].to_coordinate(position_m) + pos_m["position"] = pos_m["position"] - self._offsets[index] + return self._loci[self._direction(index)].to_coordinate(pos_m) elif region == "u": if self._inverted: - return abs(position_m["position"]) + self._locations[-1][1] + position_m["offset"] - 1 - return self._locations[0][0] - abs(position_m["position"]) + position_m["offset"] + return abs(pos_m["position"]) + self._locations[-1][1] + pos_m["offset"] - 1 + return self._locations[0][0] - abs(pos_m["position"]) + pos_m["offset"] - else: # d + else: if self._inverted: - return self._locations[0][0] - abs(position_m["position"]) + position_m["offset"] - return abs(position_m["position"]) + self._locations[-1][1] + position_m["offset"] - 1 + return self._locations[0][0] - abs(pos_m["position"]) + pos_m["offset"] + return abs(pos_m["position"]) + self._locations[-1][1] + pos_m["offset"] - 1 From 5ba6a5afdd7bd5395006dd70dc34122f1706aef3 Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Mon, 9 Mar 2026 11:45:33 +0100 Subject: [PATCH 021/127] Cleanup(multi_locus) --- mutalyzer_crossmapper/multi_locus.py | 32 +++++++++++++--------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/mutalyzer_crossmapper/multi_locus.py b/mutalyzer_crossmapper/multi_locus.py index b7b6ce4..6a71cc4 100644 --- a/mutalyzer_crossmapper/multi_locus.py +++ b/mutalyzer_crossmapper/multi_locus.py @@ -75,19 +75,18 @@ def to_position(self, coordinate:int): "region": "" } - elif location["offset"] < 0: + if location["offset"] < 0: return { "position": self._offsets[self._direction(index)], "offset": location["offset"], "region": "" } - else: - return{ - "position": location["position"] + self._offsets[self._direction(index)], - "offset": location["offset"], - "region": "" - } + return{ + "position": location["position"] + self._offsets[self._direction(index)], + "offset": location["offset"], + "region": "" + } def to_coordinate(self, pos_m:dict): """Convert a position model to a coordinate. @@ -98,20 +97,19 @@ def to_coordinate(self, pos_m:dict): """ region = pos_m["region"] - if region == "": - index = min( - len(self._offsets), - max(0, bisect_right(self._offsets, pos_m["position"]) - 1) - ) - pos_m["position"] = pos_m["position"] - self._offsets[index] - return self._loci[self._direction(index)].to_coordinate(pos_m) - - elif region == "u": + if region == "u": if self._inverted: return abs(pos_m["position"]) + self._locations[-1][1] + pos_m["offset"] - 1 return self._locations[0][0] - abs(pos_m["position"]) + pos_m["offset"] - else: + if region == "d": if self._inverted: return self._locations[0][0] - abs(pos_m["position"]) + pos_m["offset"] return abs(pos_m["position"]) + self._locations[-1][1] + pos_m["offset"] - 1 + + index = min( + len(self._offsets), + max(0, bisect_right(self._offsets, pos_m["position"]) - 1) + ) + pos_m["position"] = pos_m["position"] - self._offsets[index] + return self._loci[self._direction(index)].to_coordinate(pos_m) From 7f32afcc2cff00fdcbc7ba92d242548f1027b7f2 Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Mon, 9 Mar 2026 11:50:43 +0100 Subject: [PATCH 022/127] Format(multi locus test) --- tests/test_multi_locus.py | 194 +++++++++++++++++++++++++++++++------- 1 file changed, 161 insertions(+), 33 deletions(-) diff --git a/tests/test_multi_locus.py b/tests/test_multi_locus.py index 922529a..c08ff7f 100644 --- a/tests/test_multi_locus.py +++ b/tests/test_multi_locus.py @@ -15,6 +15,7 @@ def test_offsets_inverted(): """Cummulative location lengths for inverted list of locations.""" assert _offsets(_locations, -1) == [0, 2, 4, 8, 13, 19] + def test_offsets_adjacent(): """Cummulative location lengths for adjacent locations.""" assert _offsets([(1, 3), (3, 5)], 1) == [0, 2] @@ -30,32 +31,71 @@ def test_MultiLocus(): multi_locus = MultiLocus(_locations) # Boundary between upstream and the first locus. - invariant(multi_locus.to_position, 4, multi_locus.to_coordinate, {"position": 1, "offset": 0, "region": "u"}, + invariant( + multi_locus.to_position, + 4, + multi_locus.to_coordinate, + {"position": 1, "offset": 0, "region": "u"}, ) invariant( - multi_locus.to_position, 5, multi_locus.to_coordinate, {"position": 0, "offset": 0, "region": ""}, + multi_locus.to_position, + 5, + multi_locus.to_coordinate, + {"position": 0, "offset": 0, "region": ""}, ) # Internal locus. invariant( - multi_locus.to_position, 29, multi_locus.to_coordinate, {"position": 9, "offset": -1, "region": ""}) + multi_locus.to_position, + 29, + multi_locus.to_coordinate, + {"position": 9, "offset": -1, "region": ""}, + ) invariant( - multi_locus.to_position, 30, multi_locus.to_coordinate, {"position": 9, "offset": 0, "region": ""}) + multi_locus.to_position, + 30, + multi_locus.to_coordinate, + {"position": 9, "offset": 0, "region": ""}, + ) invariant( - multi_locus.to_position, 31, multi_locus.to_coordinate, {"position": 10, "offset": 0, "region": ""}) + multi_locus.to_position, + 31, + multi_locus.to_coordinate, + {"position": 10, "offset": 0, "region": ""}, + ) invariant( - multi_locus.to_position, 33, multi_locus.to_coordinate, {"position": 12, "offset": 0, "region": ""}) + multi_locus.to_position, + 33, + multi_locus.to_coordinate, + {"position": 12, "offset": 0, "region": ""}, + ) invariant( - multi_locus.to_position, 34, multi_locus.to_coordinate, {"position": 13, "offset": 0, "region": ""}) + multi_locus.to_position, + 34, + multi_locus.to_coordinate, + {"position": 13, "offset": 0, "region": ""}, + ) invariant( - multi_locus.to_position, 35, multi_locus.to_coordinate, {"position": 13, "offset": 1, "region": ""}) + multi_locus.to_position, + 35, + multi_locus.to_coordinate, + {"position": 13, "offset": 1, "region": ""}, + ) # Boundary between the last locus and downstream. invariant( - multi_locus.to_position, 71, multi_locus.to_coordinate, {"position": 21, "offset": 0, "region": ""}) + multi_locus.to_position, + 71, + multi_locus.to_coordinate, + {"position": 21, "offset": 0, "region": ""}, + ) invariant( - multi_locus.to_position, 72, multi_locus.to_coordinate, {"position": 1, "offset": 0, "region": "d"}) + multi_locus.to_position, + 72, + multi_locus.to_coordinate, + {"position": 1, "offset": 0, "region": "d"}, + ) def test_MultiLocus_inverted(): @@ -64,29 +104,69 @@ def test_MultiLocus_inverted(): # Boundary between upstream and the first locus. invariant( - multi_locus.to_position, 72, multi_locus.to_coordinate, {"position": 1, "offset": 0, "region": "u"}) + multi_locus.to_position, + 72, + multi_locus.to_coordinate, + {"position": 1, "offset": 0, "region": "u"}, + ) invariant( - multi_locus.to_position, 71, multi_locus.to_coordinate, {"position": 0, "offset": 0, "region": ""}) + multi_locus.to_position, + 71, + multi_locus.to_coordinate, + {"position": 0, "offset": 0, "region": ""}, + ) # Internal locus. invariant( - multi_locus.to_position, 35, multi_locus.to_coordinate, {"position": 8, "offset": -1, "region": ""}) + multi_locus.to_position, + 35, + multi_locus.to_coordinate, + {"position": 8, "offset": -1, "region": ""}, + ) invariant( - multi_locus.to_position, 34, multi_locus.to_coordinate, {"position": 8, "offset": 0, "region": ""}) + multi_locus.to_position, + 34, + multi_locus.to_coordinate, + {"position": 8, "offset": 0, "region": ""}, + ) invariant( - multi_locus.to_position, 33, multi_locus.to_coordinate, {"position": 9, "offset": 0, "region": ""}) + multi_locus.to_position, + 33, + multi_locus.to_coordinate, + {"position": 9, "offset": 0, "region": ""}, + ) invariant( - multi_locus.to_position, 31, multi_locus.to_coordinate, {"position": 11, "offset": 0, "region": ""}) + multi_locus.to_position, + 31, + multi_locus.to_coordinate, + {"position": 11, "offset": 0, "region": ""}, + ) invariant( - multi_locus.to_position, 30, multi_locus.to_coordinate, {"position": 12, "offset": 0, "region": ""}) + multi_locus.to_position, + 30, + multi_locus.to_coordinate, + {"position": 12, "offset": 0, "region": ""}, + ) invariant( - multi_locus.to_position, 29, multi_locus.to_coordinate, {"position": 12, "offset": 1, "region": ""}) + multi_locus.to_position, + 29, + multi_locus.to_coordinate, + {"position": 12, "offset": 1, "region": ""}, + ) # Boundary between the last locus and downstream. invariant( - multi_locus.to_position, 5, multi_locus.to_coordinate, {"position": 21, "offset": 0, "region": ""}) + multi_locus.to_position, + 5, + multi_locus.to_coordinate, + {"position": 21, "offset": 0, "region": ""}, + ) invariant( - multi_locus.to_position, 4, multi_locus.to_coordinate, {"position": 1, "offset": 0, "region": "d"}) + multi_locus.to_position, + 4, + multi_locus.to_coordinate, + {"position": 1, "offset": 0, "region": "d"}, + ) def test_MultiLocus_adjacent_loci(): @@ -94,9 +174,17 @@ def test_MultiLocus_adjacent_loci(): multi_locus = MultiLocus([(1, 3), (3, 5)]) invariant( - multi_locus.to_position, 2, multi_locus.to_coordinate, {"position": 1, "offset": 0, "region": ""}) + multi_locus.to_position, + 2, + multi_locus.to_coordinate, + {"position": 1, "offset": 0, "region": ""}, + ) invariant( - multi_locus.to_position, 3, multi_locus.to_coordinate, {"position": 2, "offset": 0, "region": ""}) + multi_locus.to_position, + 3, + multi_locus.to_coordinate, + {"position": 2, "offset": 0, "region": ""}, + ) def test_MultiLocus_adjacent_loci_inverted(): @@ -104,9 +192,17 @@ def test_MultiLocus_adjacent_loci_inverted(): multi_locus = MultiLocus([(1, 3), (3, 5)], True) invariant( - multi_locus.to_position, 3, multi_locus.to_coordinate, {"position": 1, "offset": 0, "region": ""}) + multi_locus.to_position, + 3, + multi_locus.to_coordinate, + {"position": 1, "offset": 0, "region": ""}, + ) invariant( - multi_locus.to_position, 2, multi_locus.to_coordinate, {"position": 2, "offset": 0, "region": ""}) + multi_locus.to_position, + 2, + multi_locus.to_coordinate, + {"position": 2, "offset": 0, "region": ""}, + ) def test_MultiLocus_offsets_odd(): @@ -114,9 +210,17 @@ def test_MultiLocus_offsets_odd(): multi_locus = MultiLocus([(1, 3), (6, 8)]) invariant( - multi_locus.to_position, 4, multi_locus.to_coordinate, {"position": 1, "offset": 2, "region": ""}) + multi_locus.to_position, + 4, + multi_locus.to_coordinate, + {"position": 1, "offset": 2, "region": ""}, + ) invariant( - multi_locus.to_position, 5, multi_locus.to_coordinate, {"position": 2, "offset": -1, "region": ""}) + multi_locus.to_position, + 5, + multi_locus.to_coordinate, + {"position": 2, "offset": -1, "region": ""}, + ) def test_MultiLocus_offsets_odd_inverted(): @@ -124,9 +228,17 @@ def test_MultiLocus_offsets_odd_inverted(): multi_locus = MultiLocus([(1, 3), (6, 8)], True) invariant( - multi_locus.to_position, 4, multi_locus.to_coordinate, {"position": 1, "offset": 2, "region": ""}) + multi_locus.to_position, + 4, + multi_locus.to_coordinate, + {"position": 1, "offset": 2, "region": ""}, + ) invariant( - multi_locus.to_position, 3, multi_locus.to_coordinate, {"position": 2, "offset": -1, "region": ""}) + multi_locus.to_position, + 3, + multi_locus.to_coordinate, + {"position": 2, "offset": -1, "region": ""}, + ) def test_MultiLocus_offsets_even(): @@ -134,9 +246,17 @@ def test_MultiLocus_offsets_even(): multi_locus = MultiLocus([(1, 3), (7, 9)]) invariant( - multi_locus.to_position, 4, multi_locus.to_coordinate, {"position": 1, "offset": 2, "region": ""}) + multi_locus.to_position, + 4, + multi_locus.to_coordinate, + {"position": 1, "offset": 2, "region": ""}, + ) invariant( - multi_locus.to_position, 5, multi_locus.to_coordinate, {"position": 2, "offset": -2, "region": ""}) + multi_locus.to_position, + 5, + multi_locus.to_coordinate, + {"position": 2, "offset": -2, "region": ""}, + ) def test_MultiLocus_offsets_even_inverted(): @@ -144,9 +264,17 @@ def test_MultiLocus_offsets_even_inverted(): multi_locus = MultiLocus([(1, 3), (7, 9)], True) invariant( - multi_locus.to_position, 5, multi_locus.to_coordinate, {"position": 1, "offset": 2, "region": ""}) + multi_locus.to_position, + 5, + multi_locus.to_coordinate, + {"position": 1, "offset": 2, "region": ""}, + ) invariant( - multi_locus.to_position, 4, multi_locus.to_coordinate, {"position": 2, "offset": -2, "region": ""}) + multi_locus.to_position, + 4, + multi_locus.to_coordinate, + {"position": 2, "offset": -2, "region": ""}, + ) def test_MultiLocus_degenerate(): @@ -193,6 +321,6 @@ def test_MultiLocus_inverted_degenerate(): [ {"position": 0, "offset": -1, "region": "d"}, {"position": 1, "offset": 0, "region": "d"}, - {"position": 2, "offset": 1, "region": "d"} + {"position": 2, "offset": 1, "region": "d"}, ], ) From e23fe2abd02ebd8a2267f783cab225c2d7c10ebb Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Mon, 9 Mar 2026 13:41:45 +0100 Subject: [PATCH 023/127] Cleanup(crossmapper): on Genomic and NonCoding --- mutalyzer_crossmapper/crossmapper.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/mutalyzer_crossmapper/crossmapper.py b/mutalyzer_crossmapper/crossmapper.py index 9056ade..c90c15a 100644 --- a/mutalyzer_crossmapper/crossmapper.py +++ b/mutalyzer_crossmapper/crossmapper.py @@ -8,18 +8,18 @@ def coordinate_to_genomic(self, coordinate): :arg int coordinate: Coordinate. - :returns dict: Genomic position. + :returns dict: Genomic position model. """ return {"position": coordinate + 1} - def genomic_to_coordinate(self, position_m): + def genomic_to_coordinate(self, pos_m): """Convert a genomic position (g./m./o.) to a coordinate. - :arg int position: Genomic position model. + :arg dict position: Genomic position model. :returns int: Coordinate. """ - return position_m["position"] - 1 + return pos_m["position"] - 1 class NonCoding(Genomic): @@ -45,16 +45,16 @@ def coordinate_to_noncoding(self, coordinate): pos_m["position"] = pos_m["position"] + 1 return pos_m - def noncoding_to_coordinate(self, position_m): + def noncoding_to_coordinate(self, pos_m): """Convert a noncoding position (n./r.) to a coordinate. :arg dict postion_m: Noncoding position model. :returns int: Coordinate. """ - if position_m["region"] == "": - position_m["position"] = position_m["position"] - 1 - return self._noncoding.to_coordinate(position_m) + if pos_m["region"] == "": + pos_m["position"] = pos_m["position"] - 1 + return self._noncoding.to_coordinate(pos_m) class Coding(NonCoding): From 8f05341e9cde5b26c675bc4780be079ad5874038 Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Mon, 9 Mar 2026 13:44:08 +0100 Subject: [PATCH 024/127] Cleanup --- mutalyzer_crossmapper/crossmapper.py | 4 ++-- mutalyzer_crossmapper/multi_locus.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/mutalyzer_crossmapper/crossmapper.py b/mutalyzer_crossmapper/crossmapper.py index c90c15a..9762598 100644 --- a/mutalyzer_crossmapper/crossmapper.py +++ b/mutalyzer_crossmapper/crossmapper.py @@ -15,7 +15,7 @@ def coordinate_to_genomic(self, coordinate): def genomic_to_coordinate(self, pos_m): """Convert a genomic position (g./m./o.) to a coordinate. - :arg dict position: Genomic position model. + :arg dict pos_m: Genomic position model. :returns int: Coordinate. """ @@ -48,7 +48,7 @@ def coordinate_to_noncoding(self, coordinate): def noncoding_to_coordinate(self, pos_m): """Convert a noncoding position (n./r.) to a coordinate. - :arg dict postion_m: Noncoding position model. + :arg dict pos_m: Noncoding position model. :returns int: Coordinate. """ diff --git a/mutalyzer_crossmapper/multi_locus.py b/mutalyzer_crossmapper/multi_locus.py index 6a71cc4..2853ee6 100644 --- a/mutalyzer_crossmapper/multi_locus.py +++ b/mutalyzer_crossmapper/multi_locus.py @@ -91,7 +91,7 @@ def to_position(self, coordinate:int): def to_coordinate(self, pos_m:dict): """Convert a position model to a coordinate. - :arg dict position: Position model with 'position','offset' and 'region' keys. + :arg dict pos_m: Position model with 'position','offset' and 'region' keys. :returns int: Coordinate. """ From 09f721ff0c74cedab27410ddf0cba198cdf8a581 Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Tue, 10 Mar 2026 09:56:23 +0100 Subject: [PATCH 025/127] Refactor degenerate tests --- tests/test_crossmapper.py | 114 +++++++++++++++++++++++++++++--------- 1 file changed, 88 insertions(+), 26 deletions(-) diff --git a/tests/test_crossmapper.py b/tests/test_crossmapper.py index e39225f..6ba2d83 100644 --- a/tests/test_crossmapper.py +++ b/tests/test_crossmapper.py @@ -65,7 +65,10 @@ def test_NonCoding_degenerate(): # Boundary between upstream and transcript. degenerate_equal( crossmap.noncoding_to_coordinate, 4, - [{"position": 1, "offset": 0, "region":"u"}]) + [ + {"position": 1, "offset": 0, "region":"u"}, + {"position": 0, "offset": -1, "region":"u"} + ]) # Boundary between downstream and transcript. degenerate_equal( @@ -234,7 +237,7 @@ def test_Coding_no_utr5(): # Direct transition from upstream to CDS. invariant( crossmap.coordinate_to_coding, 9, - crossmap.coding_to_coordinate, #(1, -1, 0, -1) + crossmap.coding_to_coordinate, {'position': 1, 'offset': 0, 'region': 'u'}) # serialize result : u1 invariant( crossmap.coordinate_to_coding, 10, @@ -249,7 +252,7 @@ def test_Coding_no_utr5_inverted(): # Direct transition from upstream to CDS. invariant( crossmap.coordinate_to_coding, 20, - crossmap.coding_to_coordinate, #(1, -1, 0, -1) + crossmap.coding_to_coordinate, {'position': 1, 'offset': 0, 'region': 'u'}) invariant( crossmap.coordinate_to_coding, 19, @@ -295,16 +298,16 @@ def test_Coding_small_utr5(): # Transition from upstream to 5' UTR to CDS. invariant( crossmap.coordinate_to_coding, 9, - crossmap.coding_to_coordinate, #(-1, -1, -1, -1) + crossmap.coding_to_coordinate, {'position': 1, 'offset': 0, 'region': 'u'}) invariant( crossmap.coordinate_to_coding, 10, - crossmap.coding_to_coordinate, #(-1, 0, -1, 0)) + crossmap.coding_to_coordinate, {'position': 1, 'offset': 0, 'region': '-'} ) invariant( crossmap.coordinate_to_coding, 11, - crossmap.coding_to_coordinate, #(1, 0, 0, 0)) + crossmap.coding_to_coordinate, {'position': 1, 'offset': 0, 'region': ''}) @@ -372,13 +375,29 @@ def test_Coding_degenerate(): degenerate_equal( crossmap.coding_to_coordinate, 9, [ - {'position': 1, 'offset': 8, 'region': 'u'}, - {'position': 8, 'offset': 1, 'region': 'u'}, - {'position': -1, 'offset': 10, 'region': 'u'} + {'position': 1, 'offset': 0, 'region': 'u'}, + {'position': 2, 'offset': 1, 'region': 'u'}, + {'position': 0, 'offset': -1, 'region': 'u'}, + {'position': 1, 'offset': -1, 'region': '-'}, + {'position': 2, 'offset': 0, 'region': '-'}, + {'position': 1, 'offset': -2, 'region': ''}, ]) degenerate_equal( crossmap.coding_to_coordinate, 20, - [(1, 1, 1, 1), (2, 0, 1, 1), (8, 2, 0, 1), (-1, 10, -1, 1)]) + [ + {'position': 1, 'offset': 0, 'region': 'd'}, + {'position': 8, 'offset': -7, 'region': 'd'}, + {'position': 0, 'offset': -1, 'region': 'd'}, + {'position': 2, 'offset': 0, 'region': '*'}, + {'position': 1, 'offset': 1, 'region': '*'}, + {'position': 8, 'offset': 2, 'region': ''}, + ] + ) + + +#TODO: Add tests for silently degenerate, +# position value <= 0 +# offset value > intron length def test_Coding_inverted_degenerate(): @@ -387,26 +406,46 @@ def test_Coding_inverted_degenerate(): degenerate_equal( crossmap.coding_to_coordinate, 20, - [(-1, -1, -1, -1), (-2, 0, -1, -1), (1, -2, 0, -1), (1, -10, 1, -1)]) + [ + {'position': 1, 'offset': 0, 'region': 'u'}, + {'position': 2, 'offset': 1, 'region': 'u'}, + {'position': 0, 'offset': -1, 'region': 'u'}, + {'position': 1, 'offset': -2, 'region': ''}, + {'position': 1, 'offset': -1, 'region': '-'}, + {'position': 2, 'offset': 0, 'region': '-'} + ] + ) degenerate_equal( crossmap.coding_to_coordinate, 9, - [(1, 1, 1, 1), (2, 0, 1, 1), (8, 2, 0, 1), (-1, 10, -1, 1)]) + [ + {'position': 1, 'offset': 0, 'region': 'd'}, + {'position': 2, 'offset': -1, 'region': 'd'}, + {'position': 1, 'offset': 1, 'region': '*'}, + {'position': 1, 'offset': 1, 'region': '*'}, + {'position': 10, 'offset': 0, 'region': ''}, + ] + ) def test_Coding_degenerate_return(): """Degenerate upstream and downstream positions may be returned.""" crossmap = Coding([(10, 20)], (11, 19)) - assert crossmap.coordinate_to_coding(9, True) == (-2, 0, -1, -1) - assert crossmap.coordinate_to_coding(20, True) == (2, 0, 1, 1) + for i in range(0, 30): + print(i, crossmap.coordinate_to_coding(i), crossmap.coordinate_to_coding(i, True)) + + assert crossmap.coordinate_to_coding(9, True) == {'position': 2, 'offset': 0, 'region': '-'} + assert crossmap.coordinate_to_coding(20, True) == {'position': 2, 'offset': 0, 'region': '*'} def test_Coding_inverted_degenerate_return(): """Degenerate upstream and downstream positions may be returned.""" crossmap = Coding([(10, 20)], (11, 19), True) + for i in range(0, 30): + print(i, crossmap.coordinate_to_coding(i), crossmap.coordinate_to_coding(i, True)) - assert crossmap.coordinate_to_coding(20, True) == (-2, 0, -1, -1) - assert crossmap.coordinate_to_coding(9, True) == (2, 0, 1, 1) + assert crossmap.coordinate_to_coding(20, True) == {'position': 2, 'offset': 0, 'region': '-'} + assert crossmap.coordinate_to_coding(9, True) == {'position': 2, 'offset': 0, 'region': '*'} def test_Coding_degenerate_no_return(): @@ -431,32 +470,55 @@ def test_Coding_no_utr_degenerate(): degenerate_equal( crossmap.coding_to_coordinate, 9, - [(1, -1, 0, -1), (-1, 0, -1, -1), (1, -2, 1, -1)]) + [ + {'position': 1, 'offset': 0, 'region': '-'}, + {'position': 1, 'offset': 0, 'region': 'u'}, + {'position': 1, 'offset': -1, 'region': ''}, + ] + ) degenerate_equal( crossmap.coding_to_coordinate, 11, - [(1, 1, 0, 1), (1, 0, 1, 1), (-1, 2, -1, 1)]) - + [ + {'position': 1, 'offset': 0, 'region': '*'}, + {'position': 1, 'offset': 0, 'region': 'd'}, + {'position': 1, 'offset': 1, 'region': ''} + ] + ) def test_Coding_inverted_no_utr_degenerate(): """UTRs may be missing.""" crossmap = Coding([(10, 11)], (10, 11), True) + # [(1, -1, 0, -1), (-1, 0, -1, -1), (1, -2, 1, -1)]) degenerate_equal( crossmap.coding_to_coordinate, 11, - [(1, -1, 0, -1), (-1, 0, -1, -1), (1, -2, 1, -1)]) + [ + {'position': 1, 'offset': 0, 'region': 'u'}, + {'position': 2, 'offset': 1, 'region': 'u'}, + {'position': 1, 'offset': 0, 'region': '-'}, + {'position': 1, 'offset': 0, 'region': '*'}, + ] +) degenerate_equal( crossmap.coding_to_coordinate, 9, - [(1, 1, 0, 1), (1, 0, 1, 1), (-1, 2, -1, 1)]) - + [ + {'position': 1, 'offset': 0, 'region': 'd'}, + {'position': 1, 'offset': 0, 'region': '*'}, + {'position': 1, 'offset': -1, 'region': ''}, + ] + ) def test_Coding_no_utr_degenerate_return(): """UTRs may be missing.""" crossmap = Coding([(10, 11)], (10, 11)) - assert crossmap.coordinate_to_coding(8, True) == (-2, 0, -1, -2) - assert crossmap.coordinate_to_coding(9, True) == (-1, 0, -1, -1) - assert crossmap.coordinate_to_coding(11, True) == (1, 0, 1, 1) - assert crossmap.coordinate_to_coding(12, True) == (2, 0, 1, 2) + print(crossmap.coordinate_to_coding(11), crossmap.coordinate_to_coding(11, True)) + print(crossmap.coordinate_to_coding(12), crossmap.coordinate_to_coding(12, True)) + + assert crossmap.coordinate_to_coding(8, True) == {'position': 2, 'offset': 0, 'region': '-'}#(-2, 0, -1, -2) + assert crossmap.coordinate_to_coding(9, True) == {'position': 1, 'offset': 0, 'region': '-'}#(-1, 0, -1, -1) + assert crossmap.coordinate_to_coding(11, True) == {'position': 1, 'offset': 0, 'region': '*'}#(1, 0, 1, 1) + assert crossmap.coordinate_to_coding(12, True) == {'position': 2, 'offset': 0, 'region': '*'}#(2, 0, 1, 2) def test_Coding_inverted_no_utr_degenerate_return(): From abead481a3df79ef50a378788f2a3b136f05aae7 Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Tue, 10 Mar 2026 12:26:37 +0100 Subject: [PATCH 026/127] Add position model description for g.,n.,c.,and p. --- README.rst | 76 +++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 70 insertions(+), 6 deletions(-) diff --git a/README.rst b/README.rst index 061f91b..03088d3 100644 --- a/README.rst +++ b/README.rst @@ -41,13 +41,22 @@ resides on the complement strand. Please see ReadTheDocs_ for the latest documentation. - Quick start ----------- +The `Genomic` class provides an interface to conversions between genomic positions and coordinates. -The ``Genomic`` class provides an interface to conversions between genomic -positions and coordinates. +***Genomic Position Model*** +Genomic positions follow the HGVS ``g`` coordinate system. They are represented +as dictionaries: + +.. code:: json + + {"position": int} +Where: +- ``position`` is a positive integer + +***Genomic Position Conversion*** .. code:: python >>> from mutalyzer_crossmapper import Genomic @@ -61,6 +70,23 @@ On top of the functionality provided by the ``Genomic`` class, the ``NonCoding`` class provides an interface to conversions between noncoding positions and coordinates. +***NonCoding Position Model*** +Noncoding positions follow the HGVS `n` coordinate system. They are represented +as dictionaries: +.. code:: json + { + "position": int, + "offset": int, + "region": str + } +Where: +- `position` is a positive interger +- `offset` is an interger indicating the offset relative to the position (e.g., +negative for upstream or positive for downstream) +- `region` uses string describing the region type (empty string `""` for standard +noncoding positions, `"u"` for upstream and `"d"` for downstream.) + +***NonCoding Position Conversion*** .. code:: python >>> from mutalyzer_crossmapper import NonCoding @@ -71,13 +97,32 @@ positions and coordinates. >>> crossmap.noncoding_to_coordinate({"position": 14, "offset": 1, "region": ""}) 35 -Add the flag ``inverted=True`` to the constructor when the transcript resides +****Notes**** +- Add the flag ``inverted=True`` to the constructor when the transcript resides on the reverse complement strand. On top of the functionality provided by the ``NonCoding`` class, the ``Coding`` class provides an interface to conversions between coding positions and coordinates as well as conversions between protein positions and coordinates. +***Coding Position Model*** +Coding positions follow the HGVS `c`` coordinate system. They are represented as +dictionaries: +.. code:: json + { + "position": int, + "offset": int, + "region": str + } +Where: +- `position` is a positive interger +- `offset` is an interger indicating the offset relative to the position (e.g., +negative for upstream or positive for downstream) +- `region` uses string describing the region type (empty string `""` for standard +coding positions, `"-"` for 5' UTR, `"*"` for 3' UTR, `"u"` for upstream and `"d"` +for downstream.) + +***Coding Position Conversion*** .. code:: python >>> from mutalyzer_crossmapper import Coding @@ -88,11 +133,30 @@ coordinates as well as conversions between protein positions and coordinates. >>> crossmap.coding_to_coordinate({"position": -1, "offset": 0, "region": "-"}) 31 -Again, the flag ``inverted=True`` can be used for transcripts that reside on +****Notes**** +- Again, the flag ``inverted=True`` can be used for transcripts that reside on the reverse complement strand. -Conversions between protein positions and coordinates are done as follows. +***Protein Position Model*** +Protein positions follow the HGVS `p`` coordinate system. They are represented +as dictionaries: +.. code:: json +{ + "position": int, + "position_in_codon": int, + "offset": int, + "region": str +} +Where: +- **position**: the amino acid position (1-based) +- **position_in_codon**: the codon nucleotide index (1, 2, or 3) +- **offset**: an integer indicating offset relative to the codon +- **region**: a string describing the region type (empty string `""`` for standard positions) + +***Protein Position Conversion*** + +Conversions between protein positions and coordinates are done as follows. .. code:: python >>> crossmap.coordinate_to_protein(41) From b528f926333ce8297aff3e21de15f5e3415d32f3 Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Tue, 10 Mar 2026 12:29:57 +0100 Subject: [PATCH 027/127] Format document --- README.rst | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/README.rst b/README.rst index 03088d3..2aff9d5 100644 --- a/README.rst +++ b/README.rst @@ -45,7 +45,7 @@ Quick start ----------- The `Genomic` class provides an interface to conversions between genomic positions and coordinates. -***Genomic Position Model*** +**Genomic Position Model** Genomic positions follow the HGVS ``g`` coordinate system. They are represented as dictionaries: @@ -56,7 +56,7 @@ as dictionaries: Where: - ``position`` is a positive integer -***Genomic Position Conversion*** +**Genomic Position Conversion** .. code:: python >>> from mutalyzer_crossmapper import Genomic @@ -70,7 +70,7 @@ On top of the functionality provided by the ``Genomic`` class, the ``NonCoding`` class provides an interface to conversions between noncoding positions and coordinates. -***NonCoding Position Model*** +**NonCoding Position Model** Noncoding positions follow the HGVS `n` coordinate system. They are represented as dictionaries: .. code:: json @@ -86,7 +86,7 @@ negative for upstream or positive for downstream) - `region` uses string describing the region type (empty string `""` for standard noncoding positions, `"u"` for upstream and `"d"` for downstream.) -***NonCoding Position Conversion*** +**NonCoding Position Conversion** .. code:: python >>> from mutalyzer_crossmapper import NonCoding @@ -97,7 +97,7 @@ noncoding positions, `"u"` for upstream and `"d"` for downstream.) >>> crossmap.noncoding_to_coordinate({"position": 14, "offset": 1, "region": ""}) 35 -****Notes**** +***Notes*** - Add the flag ``inverted=True`` to the constructor when the transcript resides on the reverse complement strand. @@ -105,7 +105,7 @@ On top of the functionality provided by the ``NonCoding`` class, the ``Coding`` class provides an interface to conversions between coding positions and coordinates as well as conversions between protein positions and coordinates. -***Coding Position Model*** +**Coding Position Model** Coding positions follow the HGVS `c`` coordinate system. They are represented as dictionaries: .. code:: json @@ -122,7 +122,7 @@ negative for upstream or positive for downstream) coding positions, `"-"` for 5' UTR, `"*"` for 3' UTR, `"u"` for upstream and `"d"` for downstream.) -***Coding Position Conversion*** +**Coding Position Conversion** .. code:: python >>> from mutalyzer_crossmapper import Coding @@ -133,12 +133,12 @@ for downstream.) >>> crossmap.coding_to_coordinate({"position": -1, "offset": 0, "region": "-"}) 31 -****Notes**** +***Notes*** - Again, the flag ``inverted=True`` can be used for transcripts that reside on the reverse complement strand. -***Protein Position Model*** +**Protein Position Model** Protein positions follow the HGVS `p`` coordinate system. They are represented as dictionaries: .. code:: json @@ -154,7 +154,7 @@ Where: - **offset**: an integer indicating offset relative to the codon - **region**: a string describing the region type (empty string `""`` for standard positions) -***Protein Position Conversion*** +**Protein Position Conversion** Conversions between protein positions and coordinates are done as follows. .. code:: python From 6c1b52ed17a947f8565c4e9d48b9ad71663a4783 Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Tue, 10 Mar 2026 12:34:40 +0100 Subject: [PATCH 028/127] Format document --- README.rst | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/README.rst b/README.rst index 2aff9d5..3d58a55 100644 --- a/README.rst +++ b/README.rst @@ -54,7 +54,7 @@ as dictionaries: {"position": int} Where: -- ``position`` is a positive integer +- **position**: a positive integer **Genomic Position Conversion** .. code:: python @@ -80,10 +80,10 @@ as dictionaries: "region": str } Where: -- `position` is a positive interger -- `offset` is an interger indicating the offset relative to the position (e.g., +- **position**: a positive interger +- **offset**: an interger indicating the offset relative to the position (e.g., negative for upstream or positive for downstream) -- `region` uses string describing the region type (empty string `""` for standard +- **region**: a string describing the region type (empty string `""` for standard noncoding positions, `"u"` for upstream and `"d"` for downstream.) **NonCoding Position Conversion** @@ -115,10 +115,10 @@ dictionaries: "region": str } Where: -- `position` is a positive interger -- `offset` is an interger indicating the offset relative to the position (e.g., +- **position**: a positive interger +- **offset**: an interger indicating the offset relative to the position (e.g., negative for upstream or positive for downstream) -- `region` uses string describing the region type (empty string `""` for standard +- **region**: a string describing the region type (empty string `""` for standard coding positions, `"-"` for 5' UTR, `"*"` for 3' UTR, `"u"` for upstream and `"d"` for downstream.) From b87160189983184859c90c8833811340d306baeb Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Tue, 10 Mar 2026 12:40:34 +0100 Subject: [PATCH 029/127] Format document --- README.rst | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/README.rst b/README.rst index 3d58a55..cb9198a 100644 --- a/README.rst +++ b/README.rst @@ -54,6 +54,7 @@ as dictionaries: {"position": int} Where: + - **position**: a positive integer **Genomic Position Conversion** @@ -71,8 +72,7 @@ On top of the functionality provided by the ``Genomic`` class, the positions and coordinates. **NonCoding Position Model** -Noncoding positions follow the HGVS `n` coordinate system. They are represented -as dictionaries: +Noncoding positions follow the HGVS `n` coordinate system. They are represented as dictionaries: .. code:: json { "position": int, @@ -80,6 +80,7 @@ as dictionaries: "region": str } Where: + - **position**: a positive interger - **offset**: an interger indicating the offset relative to the position (e.g., negative for upstream or positive for downstream) @@ -97,7 +98,7 @@ noncoding positions, `"u"` for upstream and `"d"` for downstream.) >>> crossmap.noncoding_to_coordinate({"position": 14, "offset": 1, "region": ""}) 35 -***Notes*** +**Notes** - Add the flag ``inverted=True`` to the constructor when the transcript resides on the reverse complement strand. @@ -115,6 +116,7 @@ dictionaries: "region": str } Where: + - **position**: a positive interger - **offset**: an interger indicating the offset relative to the position (e.g., negative for upstream or positive for downstream) @@ -133,10 +135,8 @@ for downstream.) >>> crossmap.coding_to_coordinate({"position": -1, "offset": 0, "region": "-"}) 31 -***Notes*** -- Again, the flag ``inverted=True`` can be used for transcripts that reside on -the reverse complement strand. - +**Notes** +- Again, the flag ``inverted=True`` can be used for transcripts that reside on the reverse complement strand. **Protein Position Model** Protein positions follow the HGVS `p`` coordinate system. They are represented @@ -149,6 +149,7 @@ as dictionaries: "region": str } Where: + - **position**: the amino acid position (1-based) - **position_in_codon**: the codon nucleotide index (1, 2, or 3) - **offset**: an integer indicating offset relative to the codon From 9e91d1dd91b0168e8e51c7576db7f3036a9ffccb Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Tue, 10 Mar 2026 13:50:43 +0100 Subject: [PATCH 030/127] Format document in .rst style --- README.rst | 167 ++++++++++++++++++++++++++++++----------------------- 1 file changed, 96 insertions(+), 71 deletions(-) diff --git a/README.rst b/README.rst index cb9198a..cf51664 100644 --- a/README.rst +++ b/README.rst @@ -41,24 +41,30 @@ resides on the complement strand. Please see ReadTheDocs_ for the latest documentation. -Quick start ------------ -The `Genomic` class provides an interface to conversions between genomic positions and coordinates. +Quick Start +=========== -**Genomic Position Model** -Genomic positions follow the HGVS ``g`` coordinate system. They are represented -as dictionaries: +The ``Genomic`` class provides an interface for conversions between genomic positions and coordinates. -.. code:: json +Genomic Position Model +--------------------- - {"position": int} +Genomic positions follow the HGVS ``g`` coordinate system. They are represented as dictionaries: + +.. code-block:: json + + { + "position": int + } Where: - **position**: a positive integer -**Genomic Position Conversion** -.. code:: python +Genomic Position Conversion +-------------------------- + +.. code-block:: python >>> from mutalyzer_crossmapper import Genomic >>> crossmap = Genomic() @@ -67,28 +73,34 @@ Where: >>> crossmap.genomic_to_coordinate({"position": 1}) 0 -On top of the functionality provided by the ``Genomic`` class, the -``NonCoding`` class provides an interface to conversions between noncoding -positions and coordinates. - -**NonCoding Position Model** -Noncoding positions follow the HGVS `n` coordinate system. They are represented as dictionaries: -.. code:: json - { - "position": int, - "offset": int, - "region": str - } +NonCoding Class +--------------- + +The ``NonCoding`` class provides conversions between noncoding positions and coordinates. + +NonCoding Position Model +~~~~~~~~~~~~~~~~~~~~~~~ + +Noncoding positions follow the HGVS ``n`` coordinate system. They are represented as dictionaries: + +.. code-block:: json + + { + "position": int, + "offset": int, + "region": str + } + Where: -- **position**: a positive interger -- **offset**: an interger indicating the offset relative to the position (e.g., -negative for upstream or positive for downstream) -- **region**: a string describing the region type (empty string `""` for standard -noncoding positions, `"u"` for upstream and `"d"` for downstream.) +- **position**: a positive integer +- **offset**: an integer indicating the offset relative to the position (negative for upstream, positive for downstream) +- **region**: a string describing the region type (``""`` for standard, ``"u"`` for upstream, ``"d"`` for downstream) + +NonCoding Position Conversion +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -**NonCoding Position Conversion** -.. code:: python +.. code-block:: python >>> from mutalyzer_crossmapper import NonCoding >>> exons = [(5, 8), (14, 20), (30, 35), (40, 44), (50, 52), (70, 72)] @@ -98,34 +110,39 @@ noncoding positions, `"u"` for upstream and `"d"` for downstream.) >>> crossmap.noncoding_to_coordinate({"position": 14, "offset": 1, "region": ""}) 35 -**Notes** -- Add the flag ``inverted=True`` to the constructor when the transcript resides -on the reverse complement strand. - -On top of the functionality provided by the ``NonCoding`` class, the ``Coding`` -class provides an interface to conversions between coding positions and -coordinates as well as conversions between protein positions and coordinates. - -**Coding Position Model** -Coding positions follow the HGVS `c`` coordinate system. They are represented as -dictionaries: -.. code:: json - { - "position": int, - "offset": int, - "region": str - } +Notes +~~~~~ + +- Add the flag ``inverted=True`` to the constructor when the transcript resides on the reverse complement strand. + +Coding Class +------------ + +The ``Coding`` class provides conversions between coding positions and coordinates, as well as protein positions. + +Coding Position Model +~~~~~~~~~~~~~~~~~~~~ + +Coding positions follow the HGVS ``c`` coordinate system. They are represented as dictionaries: + +.. code-block:: json + + { + "position": int, + "offset": int, + "region": str + } + Where: -- **position**: a positive interger -- **offset**: an interger indicating the offset relative to the position (e.g., -negative for upstream or positive for downstream) -- **region**: a string describing the region type (empty string `""` for standard -coding positions, `"-"` for 5' UTR, `"*"` for 3' UTR, `"u"` for upstream and `"d"` -for downstream.) +- **position**: a positive integer +- **offset**: an integer indicating the offset relative to the position +- **region**: a string describing the region type (``""`` for standard coding positions, ``"-"`` for 5' UTR, ``"*"`` for 3' UTR, ``"u"`` for upstream, ``"d"`` for downstream) + +Coding Position Conversion +~~~~~~~~~~~~~~~~~~~~~~~~~ -**Coding Position Conversion** -.. code:: python +.. code-block:: python >>> from mutalyzer_crossmapper import Coding >>> cds = (32, 43) @@ -135,30 +152,38 @@ for downstream.) >>> crossmap.coding_to_coordinate({"position": -1, "offset": 0, "region": "-"}) 31 -**Notes** -- Again, the flag ``inverted=True`` can be used for transcripts that reside on the reverse complement strand. - -**Protein Position Model** -Protein positions follow the HGVS `p`` coordinate system. They are represented -as dictionaries: -.. code:: json -{ - "position": int, - "position_in_codon": int, - "offset": int, - "region": str -} +Notes +~~~~~ + +- The flag ``inverted=True`` can be used for transcripts on the reverse complement strand. + +Protein Position Model +--------------------- + +Protein positions follow the HGVS ``p`` coordinate system. They are represented as dictionaries: + +.. code-block:: json + + { + "position": int, + "position_in_codon": int, + "offset": int, + "region": str + } + Where: - **position**: the amino acid position (1-based) - **position_in_codon**: the codon nucleotide index (1, 2, or 3) - **offset**: an integer indicating offset relative to the codon -- **region**: a string describing the region type (empty string `""`` for standard positions) +- **region**: a string describing the region type (``""`` for standard positions) + +Protein Position Conversion +~~~~~~~~~~~~~~~~~~~~~~~~~~ -**Protein Position Conversion** +Conversions between protein positions and coordinates: -Conversions between protein positions and coordinates are done as follows. -.. code:: python +.. code-block:: python >>> crossmap.coordinate_to_protein(41) {"position": 2, "position_in_codon": 2, "offset": 1, "region": ""} From 35359b8916925356449fb8a084b0060db0e37eaa Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Tue, 10 Mar 2026 14:05:21 +0100 Subject: [PATCH 031/127] Format document in .rst style --- README.rst | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/README.rst b/README.rst index cf51664..630e9c1 100644 --- a/README.rst +++ b/README.rst @@ -44,10 +44,13 @@ Please see ReadTheDocs_ for the latest documentation. Quick Start =========== +Genomic Class +------------- + The ``Genomic`` class provides an interface for conversions between genomic positions and coordinates. Genomic Position Model ---------------------- +~~~~~~~~~~~~~~~~~~~~~~~ Genomic positions follow the HGVS ``g`` coordinate system. They are represented as dictionaries: @@ -58,11 +61,10 @@ Genomic positions follow the HGVS ``g`` coordinate system. They are represented } Where: - - **position**: a positive integer Genomic Position Conversion --------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code-block:: python @@ -92,7 +94,6 @@ Noncoding positions follow the HGVS ``n`` coordinate system. They are represente } Where: - - **position**: a positive integer - **offset**: an integer indicating the offset relative to the position (negative for upstream, positive for downstream) - **region**: a string describing the region type (``""`` for standard, ``"u"`` for upstream, ``"d"`` for downstream) @@ -134,7 +135,6 @@ Coding positions follow the HGVS ``c`` coordinate system. They are represented a } Where: - - **position**: a positive integer - **offset**: an integer indicating the offset relative to the position - **region**: a string describing the region type (``""`` for standard coding positions, ``"-"`` for 5' UTR, ``"*"`` for 3' UTR, ``"u"`` for upstream, ``"d"`` for downstream) @@ -157,8 +157,11 @@ Notes - The flag ``inverted=True`` can be used for transcripts on the reverse complement strand. +Protein +------- + Protein Position Model ---------------------- +~~~~~~~~~~~~~~~~~~~~~~ Protein positions follow the HGVS ``p`` coordinate system. They are represented as dictionaries: @@ -179,7 +182,7 @@ Where: - **region**: a string describing the region type (``""`` for standard positions) Protein Position Conversion -~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Conversions between protein positions and coordinates: From d48f99d4a1a152d535ffb3b4ff2330815c06e4db Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Tue, 10 Mar 2026 14:14:21 +0100 Subject: [PATCH 032/127] Format and add an example --- README.rst | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/README.rst b/README.rst index 630e9c1..73cd6a2 100644 --- a/README.rst +++ b/README.rst @@ -44,6 +44,14 @@ Please see ReadTheDocs_ for the latest documentation. Quick Start =========== +An example below uses the following transcript data: + +.. code-block:: python + + >>>_exons = [(5, 8), (14, 20), (30, 35), (40, 44), (50, 52), (70, 72)] + >>>_cds = (32, 43) + + Genomic Class ------------- @@ -61,6 +69,7 @@ Genomic positions follow the HGVS ``g`` coordinate system. They are represented } Where: + - **position**: a positive integer Genomic Position Conversion @@ -94,6 +103,7 @@ Noncoding positions follow the HGVS ``n`` coordinate system. They are represente } Where: + - **position**: a positive integer - **offset**: an integer indicating the offset relative to the position (negative for upstream, positive for downstream) - **region**: a string describing the region type (``""`` for standard, ``"u"`` for upstream, ``"d"`` for downstream) @@ -104,8 +114,7 @@ NonCoding Position Conversion .. code-block:: python >>> from mutalyzer_crossmapper import NonCoding - >>> exons = [(5, 8), (14, 20), (30, 35), (40, 44), (50, 52), (70, 72)] - >>> crossmap = NonCoding(exons) + >>> crossmap = NonCoding(_exons) >>> crossmap.coordinate_to_noncoding(35) {"position": 14, "offset": 1, "region": ""} >>> crossmap.noncoding_to_coordinate({"position": 14, "offset": 1, "region": ""}) @@ -135,9 +144,10 @@ Coding positions follow the HGVS ``c`` coordinate system. They are represented a } Where: + - **position**: a positive integer - **offset**: an integer indicating the offset relative to the position -- **region**: a string describing the region type (``""`` for standard coding positions, ``"-"`` for 5' UTR, ``"*"`` for 3' UTR, ``"u"`` for upstream, ``"d"`` for downstream) +- **region**: a string describing the region type (``""`` for standard coding positions, ``"-"`` for 5' UTR, ``"*"`` for 3' UTR, ``"u"`` for upstream and ``"d"`` for downstream) Coding Position Conversion ~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -145,8 +155,7 @@ Coding Position Conversion .. code-block:: python >>> from mutalyzer_crossmapper import Coding - >>> cds = (32, 43) - >>> crossmap = Coding(exons, cds) + >>> crossmap = Coding(_exons, _cds) >>> crossmap.coordinate_to_coding(31) {"position": -1, "offset": 0, "region": "-"} >>> crossmap.coding_to_coordinate({"position": -1, "offset": 0, "region": "-"}) From d16278bf1938fb965c331faae541aabf0a94c406 Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Tue, 10 Mar 2026 14:23:39 +0100 Subject: [PATCH 033/127] Add table for Genomic positions and coordinate mapping --- README.rst | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/README.rst b/README.rst index 73cd6a2..6a3f600 100644 --- a/README.rst +++ b/README.rst @@ -84,6 +84,51 @@ Genomic Position Conversion >>> crossmap.genomic_to_coordinate({"position": 1}) 0 +Here is the mapping of coordinates to genomic positions: ++------------+----------+ +| Coordinate | Position | ++============+==========+ +| 0 | 1 | ++------------+----------+ +| 1 | 2 | ++------------+----------+ +| 2 | 3 | ++------------+----------+ +| 3 | 4 | ++------------+----------+ +| 4 | 5 | ++------------+----------+ +| 5 | 6 | ++------------+----------+ +| 6 | 7 | ++------------+----------+ +| 7 | 8 | ++------------+----------+ +| 8 | 9 | ++------------+----------+ +| 9 | 10 | ++------------+----------+ +| 10 | 11 | ++------------+----------+ +| 11 | 12 | ++------------+----------+ +| 12 | 13 | ++------------+----------+ +| 13 | 14 | ++------------+----------+ +| 14 | 15 | ++------------+----------+ +| 15 | 16 | ++------------+----------+ +| 16 | 17 | ++------------+----------+ +| 17 | 18 | ++------------+----------+ +| 18 | 19 | ++------------+----------+ +| 19 | 20 | ++------------+----------+ + NonCoding Class --------------- From b29309a5a6edeee544c36980e678fd910db59fa7 Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Tue, 10 Mar 2026 14:25:09 +0100 Subject: [PATCH 034/127] Format table --- README.rst | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/README.rst b/README.rst index 6a3f600..c84eade 100644 --- a/README.rst +++ b/README.rst @@ -83,49 +83,30 @@ Genomic Position Conversion {"position": 1} >>> crossmap.genomic_to_coordinate({"position": 1}) 0 - Here is the mapping of coordinates to genomic positions: + +------------+----------+ | Coordinate | Position | +============+==========+ | 0 | 1 | -+------------+----------+ | 1 | 2 | -+------------+----------+ | 2 | 3 | -+------------+----------+ | 3 | 4 | -+------------+----------+ | 4 | 5 | -+------------+----------+ | 5 | 6 | -+------------+----------+ | 6 | 7 | -+------------+----------+ | 7 | 8 | -+------------+----------+ | 8 | 9 | -+------------+----------+ | 9 | 10 | -+------------+----------+ | 10 | 11 | -+------------+----------+ | 11 | 12 | -+------------+----------+ | 12 | 13 | -+------------+----------+ | 13 | 14 | -+------------+----------+ | 14 | 15 | -+------------+----------+ | 15 | 16 | -+------------+----------+ | 16 | 17 | -+------------+----------+ | 17 | 18 | -+------------+----------+ | 18 | 19 | -+------------+----------+ | 19 | 20 | +------------+----------+ From d1c4c2ff2190433f34916fb017264df9af461148 Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Tue, 10 Mar 2026 14:37:26 +0100 Subject: [PATCH 035/127] Format table --- README.rst | 64 +++++++++++++++++++++++++++++++++--------------------- 1 file changed, 39 insertions(+), 25 deletions(-) diff --git a/README.rst b/README.rst index c84eade..f68c424 100644 --- a/README.rst +++ b/README.rst @@ -84,31 +84,45 @@ Genomic Position Conversion >>> crossmap.genomic_to_coordinate({"position": 1}) 0 Here is the mapping of coordinates to genomic positions: - -+------------+----------+ -| Coordinate | Position | -+============+==========+ -| 0 | 1 | -| 1 | 2 | -| 2 | 3 | -| 3 | 4 | -| 4 | 5 | -| 5 | 6 | -| 6 | 7 | -| 7 | 8 | -| 8 | 9 | -| 9 | 10 | -| 10 | 11 | -| 11 | 12 | -| 12 | 13 | -| 13 | 14 | -| 14 | 15 | -| 15 | 16 | -| 16 | 17 | -| 17 | 18 | -| 18 | 19 | -| 19 | 20 | -+------------+----------+ +Here are example mappings for the transcript: + +.. note:: + + These examples use the following data: + + .. code-block:: python + + _exons = [(5, 8), (14, 20), (30, 35), (40, 44), (50, 52), (70, 72)] + _cds = (32, 43) + +--- + +Genomic Positions +================= + +.. csv-table:: Coordinate to Genomic Position + :header: "Coordinate", "Position" + + 0, 1 + 1, 2 + 2, 3 + 3, 4 + 4, 5 + 5, 6 + 6, 7 + 7, 8 + 8, 9 + 9, 10 + 10, 11 + 11, 12 + 12, 13 + 13, 14 + 14, 15 + 15, 16 + 16, 17 + 17, 18 + 18, 19 + 19, 20 NonCoding Class --------------- From 7966a14d10d0c87cfd1de803da70f26e0ee26dc3 Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Tue, 10 Mar 2026 14:40:39 +0100 Subject: [PATCH 036/127] Format table --- README.rst | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/README.rst b/README.rst index f68c424..6b67229 100644 --- a/README.rst +++ b/README.rst @@ -84,21 +84,9 @@ Genomic Position Conversion >>> crossmap.genomic_to_coordinate({"position": 1}) 0 Here is the mapping of coordinates to genomic positions: -Here are example mappings for the transcript: +.. raw:: html -.. note:: - - These examples use the following data: - - .. code-block:: python - - _exons = [(5, 8), (14, 20), (30, 35), (40, 44), (50, 52), (70, 72)] - _cds = (32, 43) - ---- - -Genomic Positions -================= +
.. csv-table:: Coordinate to Genomic Position :header: "Coordinate", "Position" @@ -124,6 +112,10 @@ Genomic Positions 18, 19 19, 20 +.. raw:: html + +
+ NonCoding Class --------------- From 035c479aa2500310445655c2ef55c8cd13650c77 Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Tue, 10 Mar 2026 14:59:23 +0100 Subject: [PATCH 037/127] Add tables --- README.rst | 197 ++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 174 insertions(+), 23 deletions(-) diff --git a/README.rst b/README.rst index 6b67229..4f6f084 100644 --- a/README.rst +++ b/README.rst @@ -84,11 +84,8 @@ Genomic Position Conversion >>> crossmap.genomic_to_coordinate({"position": 1}) 0 Here is the mapping of coordinates to genomic positions: -.. raw:: html -
- -.. csv-table:: Coordinate to Genomic Position +.. csv-table:: Coordinate to Genomic Position (0-4) :header: "Coordinate", "Position" 0, 1 @@ -96,25 +93,7 @@ Here is the mapping of coordinates to genomic positions: 2, 3 3, 4 4, 5 - 5, 6 - 6, 7 - 7, 8 - 8, 9 - 9, 10 - 10, 11 - 11, 12 - 12, 13 - 13, 14 - 14, 15 - 15, 16 - 16, 17 - 17, 18 - 18, 19 - 19, 20 - -.. raw:: html - -
+ ... NonCoding Class --------------- @@ -157,6 +136,92 @@ Notes - Add the flag ``inverted=True`` to the constructor when the transcript resides on the reverse complement strand. +Here is the mapping of coordinates to noncoding positions: +.. csv-table:: Coordinate Mapping + :header: "Coordinate", "Position", "Offset", "Region" + + 0, 5, 0, u + 1, 4, 0, u + 2, 3, 0, u + 3, 2, 0, u + 4, 1, 0, u + 5, 1, 0, + 6, 2, 0, + 7, 3, 0, + 8, 3, 1, + 9, 3, 2, + 10, 3, 3, + 11, 4, -3, + 12, 4, -2, + 13, 4, -1, + 14, 4, 0, + 15, 5, 0, + 16, 6, 0, + 17, 7, 0, + 18, 8, 0, + 19, 9, 0, + 20, 9, 1, + 21, 9, 2, + 22, 9, 3, + 23, 9, 4, + 24, 9, 5, + 25, 10, -5, + 26, 10, -4, + 27, 10, -3, + 28, 10, -2, + 29, 10, -1, + 30, 10, 0, + 31, 11, 0, + 32, 12, 0, + 33, 13, 0, + 34, 14, 0, + 35, 14, 1, + 36, 14, 2, + 37, 14, 3, + 38, 15, -2, + 39, 15, -1, + 40, 15, 0, + 41, 16, 0, + 42, 17, 0, + 43, 18, 0, + 44, 18, 1, + 45, 18, 2, + 46, 18, 3, + 47, 19, -3, + 48, 19, -2, + 49, 19, -1, + 50, 19, 0, + 51, 20, 0, + 52, 20, 1, + 53, 20, 2, + 54, 20, 3, + 55, 20, 4, + 56, 20, 5, + 57, 20, 6, + 58, 20, 7, + 59, 20, 8, + 60, 20, 9, + 61, 21, -9, + 62, 21, -8, + 63, 21, -7, + 64, 21, -6, + 65, 21, -5, + 66, 21, -4, + 67, 21, -3, + 68, 21, -2, + 69, 21, -1, + 70, 21, 0, + 71, 22, 0, + 72, 1, 0, d + 73, 2, 0, d + 74, 3, 0, d + 75, 4, 0, d + 76, 5, 0, d + 77, 6, 0, d + 78, 7, 0, d + 79, 8, 0, d + + Coding Class ------------ @@ -198,6 +263,92 @@ Notes - The flag ``inverted=True`` can be used for transcripts on the reverse complement strand. +Here is the mapping of coordinates to coding positions: + +.. csv-table:: Coordinate Mapping + :header: "Coordinate", "Position", "Offset", "Region" + + 0, 5, 0, u + 1, 4, 0, u + 2, 3, 0, u + 3, 2, 0, u + 4, 1, 0, u + 5, 11, 0, - + 6, 10, 0, - + 7, 9, 0, - + 8, 9, 1, - + 9, 9, 2, - + 10, 9, 3, - + 11, 8, -3, - + 12, 8, -2, - + 13, 8, -1, - + 14, 8, 0, - + 15, 7, 0, - + 16, 6, 0, - + 17, 5, 0, - + 18, 4, 0, - + 19, 3, 0, - + 20, 3, 1, - + 21, 3, 2, - + 22, 3, 3, - + 23, 3, 4, - + 24, 3, 5, - + 25, 2, -5, - + 26, 2, -4, - + 27, 2, -3, - + 28, 2, -2, - + 29, 2, -1, - + 30, 2, 0, - + 31, 1, 0, - + 32, 1, 0, + 33, 2, 0, + 34, 3, 0, + 35, 3, 1, + 36, 3, 2, + 37, 3, 3, + 38, 4, -2, + 39, 4, -1, + 40, 4, 0, + 41, 5, 0, + 42, 6, 0, + 43, 1, 0, * + 44, 1, 1, * + 45, 1, 2, * + 46, 1, 3, * + 47, 2, -3, * + 48, 2, -2, * + 49, 2, -1, * + 50, 2, 0, * + 51, 3, 0, * + 52, 3, 1, * + 53, 3, 2, * + 54, 3, 3, * + 55, 3, 4, * + 56, 3, 5, * + 57, 3, 6, * + 58, 3, 7, * + 59, 3, 8, * + 60, 3, 9, * + 61, 4, -9, * + 62, 4, -8, * + 63, 4, -7, * + 64, 4, -6, * + 65, 4, -5, * + 66, 4, -4, * + 67, 4, -3, * + 68, 4, -2, * + 69, 4, -1, * + 70, 4, 0, * + 71, 5, 0, * + 72, 1, 0, d + 73, 2, 0, d + 74, 3, 0, d + 75, 4, 0, d + 76, 5, 0, d + 77, 6, 0, d + 78, 7, 0, d + 79, 8, 0, d + Protein ------- From f116376749732c8546f7727b889cc87aaff50548 Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Tue, 10 Mar 2026 15:04:51 +0100 Subject: [PATCH 038/127] Fix table syntax --- README.rst | 135 +++++++++++++++++++++++++++-------------------------- 1 file changed, 68 insertions(+), 67 deletions(-) diff --git a/README.rst b/README.rst index 4f6f084..1783078 100644 --- a/README.rst +++ b/README.rst @@ -137,6 +137,7 @@ Notes - Add the flag ``inverted=True`` to the constructor when the transcript resides on the reverse complement strand. Here is the mapping of coordinates to noncoding positions: + .. csv-table:: Coordinate Mapping :header: "Coordinate", "Position", "Offset", "Region" @@ -145,73 +146,73 @@ Here is the mapping of coordinates to noncoding positions: 2, 3, 0, u 3, 2, 0, u 4, 1, 0, u - 5, 1, 0, - 6, 2, 0, - 7, 3, 0, - 8, 3, 1, - 9, 3, 2, - 10, 3, 3, - 11, 4, -3, - 12, 4, -2, - 13, 4, -1, - 14, 4, 0, - 15, 5, 0, - 16, 6, 0, - 17, 7, 0, - 18, 8, 0, - 19, 9, 0, - 20, 9, 1, - 21, 9, 2, - 22, 9, 3, - 23, 9, 4, - 24, 9, 5, - 25, 10, -5, - 26, 10, -4, - 27, 10, -3, - 28, 10, -2, - 29, 10, -1, - 30, 10, 0, - 31, 11, 0, - 32, 12, 0, - 33, 13, 0, - 34, 14, 0, - 35, 14, 1, - 36, 14, 2, - 37, 14, 3, - 38, 15, -2, - 39, 15, -1, - 40, 15, 0, - 41, 16, 0, - 42, 17, 0, - 43, 18, 0, - 44, 18, 1, - 45, 18, 2, - 46, 18, 3, - 47, 19, -3, - 48, 19, -2, - 49, 19, -1, - 50, 19, 0, - 51, 20, 0, - 52, 20, 1, - 53, 20, 2, - 54, 20, 3, - 55, 20, 4, - 56, 20, 5, - 57, 20, 6, - 58, 20, 7, - 59, 20, 8, - 60, 20, 9, - 61, 21, -9, - 62, 21, -8, - 63, 21, -7, - 64, 21, -6, - 65, 21, -5, - 66, 21, -4, - 67, 21, -3, - 68, 21, -2, - 69, 21, -1, - 70, 21, 0, - 71, 22, 0, + 5, 1, 0, "" + 6, 2, 0, "" + 7, 3, 0, "" + 8, 3, 1, "" + 9, 3, 2, "" + 10, 3, 3, "" + 11, 4, -3, "" + 12, 4, -2, "" + 13, 4, -1, "" + 14, 4, 0, "" + 15, 5, 0, "" + 16, 6, 0, "" + 17, 7, 0, "" + 18, 8, 0, "" + 19, 9, 0, "" + 20, 9, 1, "" + 21, 9, 2, "" + 22, 9, 3, "" + 23, 9, 4, "" + 24, 9, 5, "" + 25, 10, -5, "" + 26, 10, -4, "" + 27, 10, -3, "" + 28, 10, -2, "" + 29, 10, -1, "" + 30, 10, 0, "" + 31, 11, 0, "" + 32, 12, 0, "" + 33, 13, 0, "" + 34, 14, 0, "" + 35, 14, 1, "" + 36, 14, 2, "" + 37, 14, 3, "" + 38, 15, -2, "" + 39, 15, -1, "" + 40, 15, 0, "" + 41, 16, 0, "" + 42, 17, 0, "" + 43, 18, 0, "" + 44, 18, 1, "" + 45, 18, 2, "" + 46, 18, 3, "" + 47, 19, -3, "" + 48, 19, -2, "" + 49, 19, -1, "" + 50, 19, 0, "" + 51, 20, 0, "" + 52, 20, 1, "" + 53, 20, 2, "" + 54, 20, 3, "" + 55, 20, 4, "" + 56, 20, 5, "" + 57, 20, 6, "" + 58, 20, 7, "" + 59, 20, 8, "" + 60, 20, 9, "" + 61, 21, -9, "" + 62, 21, -8, "" + 63, 21, -7, "" + 64, 21, -6, "" + 65, 21, -5, "" + 66, 21, -4, "" + 67, 21, -3, "" + 68, 21, -2, "" + 69, 21, -1, "" + 70, 21, 0, "" + 71, 22, 0, "" 72, 1, 0, d 73, 2, 0, d 74, 3, 0, d From 89787e6b0662bd544d77554fba30107df06e5d19 Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Tue, 10 Mar 2026 15:20:37 +0100 Subject: [PATCH 039/127] Fix table syntax --- README.rst | 134 ++++++++++++++++++++++++++--------------------------- 1 file changed, 67 insertions(+), 67 deletions(-) diff --git a/README.rst b/README.rst index 1783078..3db86a5 100644 --- a/README.rst +++ b/README.rst @@ -274,73 +274,73 @@ Here is the mapping of coordinates to coding positions: 2, 3, 0, u 3, 2, 0, u 4, 1, 0, u - 5, 11, 0, - - 6, 10, 0, - - 7, 9, 0, - - 8, 9, 1, - - 9, 9, 2, - - 10, 9, 3, - - 11, 8, -3, - - 12, 8, -2, - - 13, 8, -1, - - 14, 8, 0, - - 15, 7, 0, - - 16, 6, 0, - - 17, 5, 0, - - 18, 4, 0, - - 19, 3, 0, - - 20, 3, 1, - - 21, 3, 2, - - 22, 3, 3, - - 23, 3, 4, - - 24, 3, 5, - - 25, 2, -5, - - 26, 2, -4, - - 27, 2, -3, - - 28, 2, -2, - - 29, 2, -1, - - 30, 2, 0, - - 31, 1, 0, - - 32, 1, 0, - 33, 2, 0, - 34, 3, 0, - 35, 3, 1, - 36, 3, 2, - 37, 3, 3, - 38, 4, -2, - 39, 4, -1, - 40, 4, 0, - 41, 5, 0, - 42, 6, 0, - 43, 1, 0, * - 44, 1, 1, * - 45, 1, 2, * - 46, 1, 3, * - 47, 2, -3, * - 48, 2, -2, * - 49, 2, -1, * - 50, 2, 0, * - 51, 3, 0, * - 52, 3, 1, * - 53, 3, 2, * - 54, 3, 3, * - 55, 3, 4, * - 56, 3, 5, * - 57, 3, 6, * - 58, 3, 7, * - 59, 3, 8, * - 60, 3, 9, * - 61, 4, -9, * - 62, 4, -8, * - 63, 4, -7, * - 64, 4, -6, * - 65, 4, -5, * - 66, 4, -4, * - 67, 4, -3, * - 68, 4, -2, * - 69, 4, -1, * - 70, 4, 0, * - 71, 5, 0, * + 5, 11, 0, "-" + 6, 10, 0, "-" + 7, 9, 0, "-" + 8, 9, 1, "-" + 9, 9, 2, "-" + 10, 9, 3, "-" + 11, 8, -3, "-" + 12, 8, -2, "-" + 13, 8, -1, "-" + 14, 8, 0, "-" + 15, 7, 0, "-" + 16, 6, 0, "-" + 17, 5, 0, "-" + 18, 4, 0, "-" + 19, 3, 0, "-" + 20, 3, 1, "-" + 21, 3, 2, "-" + 22, 3, 3, "-" + 23, 3, 4, "-" + 24, 3, 5, "-" + 25, 2, -5, "-" + 26, 2, -4, "-" + 27, 2, -3, "-" + 28, 2, -2, "-" + 29, 2, -1, "-" + 30, 2, 0, "-" + 31, 1, 0, "-" + 32, 1, 0, "" + 33, 2, 0, "" + 34, 3, 0, "" + 35, 3, 1, "" + 36, 3, 2, "" + 37, 3, 3, "" + 38, 4, -2, "" + 39, 4, -1, "" + 40, 4, 0, "" + 41, 5, 0, "" + 42, 6, 0, "" + 43, 1, 0, "*" + 44, 1, 1, "*" + 45, 1, 2, "*" + 46, 1, 3, "*" + 47, 2, -3, "*" + 48, 2, -2, "*" + 49, 2, -1, "*" + 50, 2, 0, "*" + 51, 3, 0, "*" + 52, 3, 1, "*" + 53, 3, 2, "*" + 54, 3, 3, "*" + 55, 3, 4, "*" + 56, 3, 5, "*" + 57, 3, 6, "*" + 58, 3, 7, "*" + 59, 3, 8, "*" + 60, 3, 9, "*" + 61, 4, -9, "*" + 62, 4, -8, "*" + 63, 4, -7, "*" + 64, 4, -6, "*" + 65, 4, -5, "*" + 66, 4, -4, "*" + 67, 4, -3, "*" + 68, 4, -2, "*" + 69, 4, -1, "*" + 70, 4, 0, "*" + 71, 5, 0, "*" 72, 1, 0, d 73, 2, 0, d 74, 3, 0, d From 5e601dc5ae6edf5598cb314b7cb2a1ff1be4829c Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Tue, 10 Mar 2026 15:25:32 +0100 Subject: [PATCH 040/127] Fix table syntax --- README.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.rst b/README.rst index 3db86a5..ef3fcb1 100644 --- a/README.rst +++ b/README.rst @@ -138,6 +138,10 @@ Notes Here is the mapping of coordinates to noncoding positions: +.. raw:: html + +
+ .. csv-table:: Coordinate Mapping :header: "Coordinate", "Position", "Offset", "Region" @@ -222,6 +226,8 @@ Here is the mapping of coordinates to noncoding positions: 78, 7, 0, d 79, 8, 0, d +.. raw:: html +
Coding Class ------------ From 751bc3b49cba7d04efb38889ca323215fa7b5663 Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Tue, 10 Mar 2026 15:29:00 +0100 Subject: [PATCH 041/127] Fix table syntax --- README.rst | 119 ++++++++++++++--------------------------------------- 1 file changed, 32 insertions(+), 87 deletions(-) diff --git a/README.rst b/README.rst index ef3fcb1..46f5404 100644 --- a/README.rst +++ b/README.rst @@ -140,94 +140,39 @@ Here is the mapping of coordinates to noncoding positions: .. raw:: html -
+
+ + .. csv-table:: Coordinate Mapping + :header: "Coordinate", "Position", "Offset", "Region" + + 0, 5, 0, u + 1, 4, 0, u + 2, 3, 0, u + 3, 2, 0, u + 4, 1, 0, u + 5, 1, 0, "" + 6, 2, 0, "" + 7, 3, 0, "" + 8, 3, 1, "" + 9, 3, 2, "" + 10, 3, 3, "" + 11, 4, -3, "" + 12, 4, -2, "" + 13, 4, -1, "" + 14, 4, 0, "" + 15, 5, 0, "" + 16, 6, 0, "" + 17, 7, 0, "" + 18, 8, 0, "" + 19, 9, 0, "" + 20, 9, 1, "" + ... + 79, 8, 0, d + + .. raw:: html + +
-.. csv-table:: Coordinate Mapping - :header: "Coordinate", "Position", "Offset", "Region" - - 0, 5, 0, u - 1, 4, 0, u - 2, 3, 0, u - 3, 2, 0, u - 4, 1, 0, u - 5, 1, 0, "" - 6, 2, 0, "" - 7, 3, 0, "" - 8, 3, 1, "" - 9, 3, 2, "" - 10, 3, 3, "" - 11, 4, -3, "" - 12, 4, -2, "" - 13, 4, -1, "" - 14, 4, 0, "" - 15, 5, 0, "" - 16, 6, 0, "" - 17, 7, 0, "" - 18, 8, 0, "" - 19, 9, 0, "" - 20, 9, 1, "" - 21, 9, 2, "" - 22, 9, 3, "" - 23, 9, 4, "" - 24, 9, 5, "" - 25, 10, -5, "" - 26, 10, -4, "" - 27, 10, -3, "" - 28, 10, -2, "" - 29, 10, -1, "" - 30, 10, 0, "" - 31, 11, 0, "" - 32, 12, 0, "" - 33, 13, 0, "" - 34, 14, 0, "" - 35, 14, 1, "" - 36, 14, 2, "" - 37, 14, 3, "" - 38, 15, -2, "" - 39, 15, -1, "" - 40, 15, 0, "" - 41, 16, 0, "" - 42, 17, 0, "" - 43, 18, 0, "" - 44, 18, 1, "" - 45, 18, 2, "" - 46, 18, 3, "" - 47, 19, -3, "" - 48, 19, -2, "" - 49, 19, -1, "" - 50, 19, 0, "" - 51, 20, 0, "" - 52, 20, 1, "" - 53, 20, 2, "" - 54, 20, 3, "" - 55, 20, 4, "" - 56, 20, 5, "" - 57, 20, 6, "" - 58, 20, 7, "" - 59, 20, 8, "" - 60, 20, 9, "" - 61, 21, -9, "" - 62, 21, -8, "" - 63, 21, -7, "" - 64, 21, -6, "" - 65, 21, -5, "" - 66, 21, -4, "" - 67, 21, -3, "" - 68, 21, -2, "" - 69, 21, -1, "" - 70, 21, 0, "" - 71, 22, 0, "" - 72, 1, 0, d - 73, 2, 0, d - 74, 3, 0, d - 75, 4, 0, d - 76, 5, 0, d - 77, 6, 0, d - 78, 7, 0, d - 79, 8, 0, d - -.. raw:: html -
Coding Class ------------ From 9d156c80483a166dcaec27fff965bf51a9873f5d Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Tue, 10 Mar 2026 15:31:27 +0100 Subject: [PATCH 042/127] Fix table indent --- README.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 46f5404..87e3fc9 100644 --- a/README.rst +++ b/README.rst @@ -138,6 +138,8 @@ Notes Here is the mapping of coordinates to noncoding positions: +Here is the mapping of coordinates to noncoding positions: + .. raw:: html
@@ -171,8 +173,7 @@ Here is the mapping of coordinates to noncoding positions: .. raw:: html -
- + Coding Class ------------ From ebbb90a8c72eb44fc7c835859721a59978ee9fbb Mon Sep 17 00:00:00 2001 From: "X.Liu" <34545147+XLIU-hub@users.noreply.github.com> Date: Tue, 10 Mar 2026 15:37:29 +0100 Subject: [PATCH 043/127] Update README.rst --- README.rst | 66 ++++++++++++++++++++++++++++++------------------------ 1 file changed, 37 insertions(+), 29 deletions(-) diff --git a/README.rst b/README.rst index 87e3fc9..f366eaa 100644 --- a/README.rst +++ b/README.rst @@ -144,34 +144,34 @@ Here is the mapping of coordinates to noncoding positions:
- .. csv-table:: Coordinate Mapping - :header: "Coordinate", "Position", "Offset", "Region" - - 0, 5, 0, u - 1, 4, 0, u - 2, 3, 0, u - 3, 2, 0, u - 4, 1, 0, u - 5, 1, 0, "" - 6, 2, 0, "" - 7, 3, 0, "" - 8, 3, 1, "" - 9, 3, 2, "" - 10, 3, 3, "" - 11, 4, -3, "" - 12, 4, -2, "" - 13, 4, -1, "" - 14, 4, 0, "" - 15, 5, 0, "" - 16, 6, 0, "" - 17, 7, 0, "" - 18, 8, 0, "" - 19, 9, 0, "" - 20, 9, 1, "" - ... - 79, 8, 0, d - - .. raw:: html +.. csv-table:: Coordinate Mapping to Noncoding + :header: "Coordinate", "Position", "Offset", "Region" + + 0, 5, 0, u + 1, 4, 0, u + 2, 3, 0, u + 3, 2, 0, u + 4, 1, 0, u + 5, 1, 0, "" + 6, 2, 0, "" + 7, 3, 0, "" + 8, 3, 1, "" + 9, 3, 2, "" + 10, 3, 3, "" + 11, 4, -3, "" + 12, 4, -2, "" + 13, 4, -1, "" + 14, 4, 0, "" + 15, 5, 0, "" + 16, 6, 0, "" + 17, 7, 0, "" + 18, 8, 0, "" + 19, 9, 0, "" + 20, 9, 1, "" + ... + 79, 8, 0, d + +.. raw:: html
@@ -218,7 +218,11 @@ Notes Here is the mapping of coordinates to coding positions: -.. csv-table:: Coordinate Mapping +.. raw:: html + +
+ +.. csv-table:: Coordinate Mapping to Coding :header: "Coordinate", "Position", "Offset", "Region" 0, 5, 0, u @@ -302,6 +306,10 @@ Here is the mapping of coordinates to coding positions: 78, 7, 0, d 79, 8, 0, d +.. raw:: html + +
+ Protein ------- From 9ffa9bfc2bb558017b0e25e7cec06123fc688f2a Mon Sep 17 00:00:00 2001 From: "X.Liu" <34545147+XLIU-hub@users.noreply.github.com> Date: Tue, 10 Mar 2026 15:41:32 +0100 Subject: [PATCH 044/127] Make scrollable tables in README --- README.rst | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/README.rst b/README.rst index f366eaa..8d0bc03 100644 --- a/README.rst +++ b/README.rst @@ -138,13 +138,11 @@ Notes Here is the mapping of coordinates to noncoding positions: -Here is the mapping of coordinates to noncoding positions: - .. raw:: html -
+
-.. csv-table:: Coordinate Mapping to Noncoding +.. csv-table:: :header: "Coordinate", "Position", "Offset", "Region" 0, 5, 0, u @@ -220,7 +218,7 @@ Here is the mapping of coordinates to coding positions: .. raw:: html -
+
.. csv-table:: Coordinate Mapping to Coding :header: "Coordinate", "Position", "Offset", "Region" From 90f3d7192d197fb15145e470887d44e356e508f3 Mon Sep 17 00:00:00 2001 From: "X.Liu" <34545147+XLIU-hub@users.noreply.github.com> Date: Tue, 10 Mar 2026 15:54:25 +0100 Subject: [PATCH 045/127] Update README.rst --- README.rst | 178 ++++++++++++++++++++++++++--------------------------- 1 file changed, 88 insertions(+), 90 deletions(-) diff --git a/README.rst b/README.rst index 8d0bc03..3cafd15 100644 --- a/README.rst +++ b/README.rst @@ -216,97 +216,95 @@ Notes Here is the mapping of coordinates to coding positions: -.. raw:: html - -
- -.. csv-table:: Coordinate Mapping to Coding - :header: "Coordinate", "Position", "Offset", "Region" +
+Coordinate Mapping Table + +:: + + Coordinate Position Offset Region + 0 5 0 u + 1 4 0 u + 2 3 0 u + 3 2 0 u + 4 1 0 u + 5 11 0 - + 6 10 0 - + 7 9 0 - + 8 9 1 - + 9 9 2 - + 10 9 3 - + 11 8 -3 - + 12 8 -2 - + 13 8 -1 - + 14 8 0 - + 15 7 0 - + 16 6 0 - + 17 5 0 - + 18 4 0 - + 19 3 0 - + 20 3 1 - + 21 3 2 - + 22 3 3 - + 23 3 4 - + 24 3 5 - + 25 2 -5 - + 26 2 -4 - + 27 2 -3 - + 28 2 -2 - + 29 2 -1 - + 30 2 0 - + 31 1 0 - + 32 1 0 + 33 2 0 + 34 3 0 + 35 3 1 + 36 3 2 + 37 3 3 + 38 4 -2 + 39 4 -1 + 40 4 0 + 41 5 0 + 42 6 0 + 43 1 0 * + 44 1 1 * + 45 1 2 * + 46 1 3 * + 47 2 -3 * + 48 2 -2 * + 49 2 -1 * + 50 2 0 * + 51 3 0 * + 52 3 1 * + 53 3 2 * + 54 3 3 * + 55 3 4 * + 56 3 5 * + 57 3 6 * + 58 3 7 * + 59 3 8 * + 60 3 9 * + 61 4 -9 * + 62 4 -8 * + 63 4 -7 * + 64 4 -6 * + 65 4 -5 * + 66 4 -4 * + 67 4 -3 * + 68 4 -2 * + 69 4 -1 * + 70 4 0 * + 71 5 0 * + 72 1 0 d + 73 2 0 d + 74 3 0 d + 75 4 0 d + 76 5 0 d + 77 6 0 d + 78 7 0 d + 79 8 0 d + +
- 0, 5, 0, u - 1, 4, 0, u - 2, 3, 0, u - 3, 2, 0, u - 4, 1, 0, u - 5, 11, 0, "-" - 6, 10, 0, "-" - 7, 9, 0, "-" - 8, 9, 1, "-" - 9, 9, 2, "-" - 10, 9, 3, "-" - 11, 8, -3, "-" - 12, 8, -2, "-" - 13, 8, -1, "-" - 14, 8, 0, "-" - 15, 7, 0, "-" - 16, 6, 0, "-" - 17, 5, 0, "-" - 18, 4, 0, "-" - 19, 3, 0, "-" - 20, 3, 1, "-" - 21, 3, 2, "-" - 22, 3, 3, "-" - 23, 3, 4, "-" - 24, 3, 5, "-" - 25, 2, -5, "-" - 26, 2, -4, "-" - 27, 2, -3, "-" - 28, 2, -2, "-" - 29, 2, -1, "-" - 30, 2, 0, "-" - 31, 1, 0, "-" - 32, 1, 0, "" - 33, 2, 0, "" - 34, 3, 0, "" - 35, 3, 1, "" - 36, 3, 2, "" - 37, 3, 3, "" - 38, 4, -2, "" - 39, 4, -1, "" - 40, 4, 0, "" - 41, 5, 0, "" - 42, 6, 0, "" - 43, 1, 0, "*" - 44, 1, 1, "*" - 45, 1, 2, "*" - 46, 1, 3, "*" - 47, 2, -3, "*" - 48, 2, -2, "*" - 49, 2, -1, "*" - 50, 2, 0, "*" - 51, 3, 0, "*" - 52, 3, 1, "*" - 53, 3, 2, "*" - 54, 3, 3, "*" - 55, 3, 4, "*" - 56, 3, 5, "*" - 57, 3, 6, "*" - 58, 3, 7, "*" - 59, 3, 8, "*" - 60, 3, 9, "*" - 61, 4, -9, "*" - 62, 4, -8, "*" - 63, 4, -7, "*" - 64, 4, -6, "*" - 65, 4, -5, "*" - 66, 4, -4, "*" - 67, 4, -3, "*" - 68, 4, -2, "*" - 69, 4, -1, "*" - 70, 4, 0, "*" - 71, 5, 0, "*" - 72, 1, 0, d - 73, 2, 0, d - 74, 3, 0, d - 75, 4, 0, d - 76, 5, 0, d - 77, 6, 0, d - 78, 7, 0, d - 79, 8, 0, d - -.. raw:: html - -
Protein ------- From 93f59ca3864ab4850794f5af4696bda52429689e Mon Sep 17 00:00:00 2001 From: "X.Liu" <34545147+XLIU-hub@users.noreply.github.com> Date: Tue, 10 Mar 2026 16:04:14 +0100 Subject: [PATCH 046/127] Update README.rst --- README.rst | 284 +++++++++++++++++++++++++++++++---------------------- 1 file changed, 166 insertions(+), 118 deletions(-) diff --git a/README.rst b/README.rst index 3cafd15..74a7272 100644 --- a/README.rst +++ b/README.rst @@ -138,40 +138,91 @@ Notes Here is the mapping of coordinates to noncoding positions: -.. raw:: html - -
- .. csv-table:: + :class: table-scroll :header: "Coordinate", "Position", "Offset", "Region" - 0, 5, 0, u - 1, 4, 0, u - 2, 3, 0, u - 3, 2, 0, u - 4, 1, 0, u - 5, 1, 0, "" - 6, 2, 0, "" - 7, 3, 0, "" - 8, 3, 1, "" - 9, 3, 2, "" - 10, 3, 3, "" - 11, 4, -3, "" - 12, 4, -2, "" - 13, 4, -1, "" - 14, 4, 0, "" - 15, 5, 0, "" - 16, 6, 0, "" - 17, 7, 0, "" - 18, 8, 0, "" - 19, 9, 0, "" - 20, 9, 1, "" - ... - 79, 8, 0, d - -.. raw:: html + "0", "5","0", "u" + "1", "4","0", "u" + "2", "3","0", "u" + "3", "2","0", "u" + "4", "1","0", "u" + "5", "1","0", "" + "6", "2","0", "" + "7", "3","0", "" + "8", "3","1", "" + "9", "3","2", "" + "10", "3","3", "" + "11", "4","-3", "" + "12", "4","-2", "" + "13", "4","-1", "" + "14", "4","0", "" + "15", "5","0", "" + "16", "6","0", "" + "17", "7","0", "" + "18", "8","0", "" + "19", "9","0", "" + "20", "9","1", "" + "21", "9","2", "" + "22", "9","3", "" + "23", "9","4", "" + "24", "9","5", "" + "25", "10","-5", "" + "26", "10","-4", "" + "27", "10","-3", "" + "28", "10","-2", "" + "29", "10","-1", "" + "30", "10","0", "" + "31", "11","0", "" + "32", "12","0", "" + "33", "13","0", "" + "34", "14","0", "" + "35", "14","1", "" + "36", "14","2", "" + "37", "14","3", "" + "38", "15","-2", "" + "39", "15","-1", "" + "40", "15","0", "" + "41", "16","0", "" + "42", "17","0", "" + "43", "18","0", "" + "44", "18","1", "" + "45", "18","2", "" + "46", "18","3", "" + "47", "19","-3", "" + "48", "19","-2", "" + "49", "19","-1", "" + "50", "19","0", "" + "51", "20","0", "" + "52", "20","1", "" + "53", "20","2", "" + "54", "20","3", "" + "55", "20","4", "" + "56", "20","5", "" + "57", "20","6", "" + "58", "20","7", "" + "59", "20","8", "" + "60", "20","9", "" + "61", "21","-9", "" + "62", "21","-8", "" + "63", "21","-7", "" + "64", "21","-6", "" + "65", "21","-5", "" + "66", "21","-4", "" + "67", "21","-3", "" + "68", "21","-2", "" + "69", "21","-1", "" + "70", "21","0", "" + "71", "22","0", "" + "72", "1","0", "d" + "73", "2","0", "d" + "74", "3","0", "d" + "75", "4","0", "d" + "76", "5","0", "d" + "77", "6","0", "d" + "78", "7","0", "d" + "79", "8","0", "d" -
Coding Class ------------ @@ -216,94 +267,91 @@ Notes Here is the mapping of coordinates to coding positions: -
-Coordinate Mapping Table - -:: - - Coordinate Position Offset Region - 0 5 0 u - 1 4 0 u - 2 3 0 u - 3 2 0 u - 4 1 0 u - 5 11 0 - - 6 10 0 - - 7 9 0 - - 8 9 1 - - 9 9 2 - - 10 9 3 - - 11 8 -3 - - 12 8 -2 - - 13 8 -1 - - 14 8 0 - - 15 7 0 - - 16 6 0 - - 17 5 0 - - 18 4 0 - - 19 3 0 - - 20 3 1 - - 21 3 2 - - 22 3 3 - - 23 3 4 - - 24 3 5 - - 25 2 -5 - - 26 2 -4 - - 27 2 -3 - - 28 2 -2 - - 29 2 -1 - - 30 2 0 - - 31 1 0 - - 32 1 0 - 33 2 0 - 34 3 0 - 35 3 1 - 36 3 2 - 37 3 3 - 38 4 -2 - 39 4 -1 - 40 4 0 - 41 5 0 - 42 6 0 - 43 1 0 * - 44 1 1 * - 45 1 2 * - 46 1 3 * - 47 2 -3 * - 48 2 -2 * - 49 2 -1 * - 50 2 0 * - 51 3 0 * - 52 3 1 * - 53 3 2 * - 54 3 3 * - 55 3 4 * - 56 3 5 * - 57 3 6 * - 58 3 7 * - 59 3 8 * - 60 3 9 * - 61 4 -9 * - 62 4 -8 * - 63 4 -7 * - 64 4 -6 * - 65 4 -5 * - 66 4 -4 * - 67 4 -3 * - 68 4 -2 * - 69 4 -1 * - 70 4 0 * - 71 5 0 * - 72 1 0 d - 73 2 0 d - 74 3 0 d - 75 4 0 d - 76 5 0 d - 77 6 0 d - 78 7 0 d - 79 8 0 d - -
+.. csv-table:: My Scrollable Table + :class: table-scroll + :header: "Coordinate", "Position", "Offset", "Region" + + "0", "5","0", "u" + "1", "4","0", "u" + "2", "3","0", "u" + "3", "2","0", "u" + "4", "1","0", "u" + "5", "11","0", "-" + "6", "10","0", "-" + "7", "9","0", "-" + "8", "9","1", "-" + "9", "9","2", "-" + "10", "9","3", "-" + "11", "8","-3", "-" + "12", "8","-2", "-" + "13", "8","-1", "-" + "14", "8","0", "-" + "15", "7","0", "-" + "16", "6","0", "-" + "17", "5","0", "-" + "18", "4","0", "-" + "19", "3","0", "-" + "20", "3","1", "-" + "21", "3","2", "-" + "22", "3","3", "-" + "23", "3","4", "-" + "24", "3","5", "-" + "25", "2","-5", "-" + "26", "2","-4", "-" + "27", "2","-3", "-" + "28", "2","-2", "-" + "29", "2","-1", "-" + "30", "2","0", "-" + "31", "1","0", "-" + "32", "1","0", "" + "33", "2","0", "" + "34", "3","0", "" + "35", "3","1", "" + "36", "3","2", "" + "37", "3","3", "" + "38", "4","-2", "" + "39", "4","-1", "" + "40", "4","0", "" + "41", "5","0", "" + "42", "6","0", "" + "43", "1","0", "*" + "44", "1","1", "*" + "45", "1","2", "*" + "46", "1","3", "*" + "47", "2","-3", "*" + "48", "2","-2", "*" + "49", "2","-1", "*" + "50", "2","0", "*" + "51", "3","0", "*" + "52", "3","1", "*" + "53", "3","2", "*" + "54", "3","3", "*" + "55", "3","4", "*" + "56", "3","5", "*" + "57", "3","6", "*" + "58", "3","7", "*" + "59", "3","8", "*" + "60", "3","9", "*" + "61", "4","-9", "*" + "62", "4","-8", "*" + "63", "4","-7", "*" + "64", "4","-6", "*" + "65", "4","-5", "*" + "66", "4","-4", "*" + "67", "4","-3", "*" + "68", "4","-2", "*" + "69", "4","-1", "*" + "70", "4","0", "*" + "71", "5","0", "*" + "72", "1","0", "d" + "73", "2","0", "d" + "74", "3","0", "d" + "75", "4","0", "d" + "76", "5","0", "d" + "77", "6","0", "d" + "78", "7","0", "d" + "79", "8","0", "d" + Protein From 0ef6003debfe9aa6770cfbe46e41f04c07962fad Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Tue, 10 Mar 2026 16:13:41 +0100 Subject: [PATCH 047/127] Format table --- README.rst | 120 ++++++++++++++++++++++++++--------------------------- 1 file changed, 60 insertions(+), 60 deletions(-) diff --git a/README.rst b/README.rst index 74a7272..ee4ab6f 100644 --- a/README.rst +++ b/README.rst @@ -246,7 +246,7 @@ Where: - **position**: a positive integer - **offset**: an integer indicating the offset relative to the position -- **region**: a string describing the region type (``""`` for standard coding positions, ``"-"`` for 5' UTR, ``"*"`` for 3' UTR, ``"u"`` for upstream and ``"d"`` for downstream) +- **region**: a string describing the region type (`""` for standard coding positions, `'-'` for 5' UTR, `'*'` for 3' UTR, `'u'` for upstream and ``"d"`` for downstream) Coding Position Conversion ~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -256,8 +256,8 @@ Coding Position Conversion >>> from mutalyzer_crossmapper import Coding >>> crossmap = Coding(_exons, _cds) >>> crossmap.coordinate_to_coding(31) - {"position": -1, "offset": 0, "region": "-"} - >>> crossmap.coding_to_coordinate({"position": -1, "offset": 0, "region": "-"}) + {"position": -1, "offset": 0, "region": '-'} + >>> crossmap.coding_to_coordinate({"position": -1, "offset": 0, "region": '-'}) 31 Notes @@ -270,39 +270,39 @@ Here is the mapping of coordinates to coding positions: .. csv-table:: My Scrollable Table :class: table-scroll :header: "Coordinate", "Position", "Offset", "Region" - + "0", "5","0", "u" "1", "4","0", "u" "2", "3","0", "u" "3", "2","0", "u" "4", "1","0", "u" - "5", "11","0", "-" - "6", "10","0", "-" - "7", "9","0", "-" - "8", "9","1", "-" - "9", "9","2", "-" - "10", "9","3", "-" - "11", "8","-3", "-" - "12", "8","-2", "-" - "13", "8","-1", "-" - "14", "8","0", "-" - "15", "7","0", "-" - "16", "6","0", "-" - "17", "5","0", "-" - "18", "4","0", "-" - "19", "3","0", "-" - "20", "3","1", "-" - "21", "3","2", "-" - "22", "3","3", "-" - "23", "3","4", "-" - "24", "3","5", "-" - "25", "2","-5", "-" - "26", "2","-4", "-" - "27", "2","-3", "-" - "28", "2","-2", "-" - "29", "2","-1", "-" - "30", "2","0", "-" - "31", "1","0", "-" + "5", "11","0", '-' + "6", "10","0", '-' + "7", "9","0", '-' + "8", "9","1", '-' + "9", "9","2", '-' + "10", "9","3", '-' + "11", "8","-3", '-' + "12", "8","-2", '-' + "13", "8","-1", '-' + "14", "8","0", '-' + "15", "7","0", '-' + "16", "6","0", '-' + "17", "5","0", '-' + "18", "4","0", '-' + "19", "3","0", '-' + "20", "3","1", '-' + "21", "3","2", '-' + "22", "3","3", '-' + "23", "3","4", '-' + "24", "3","5", '-' + "25", "2","-5", '-' + "26", "2","-4", '-' + "27", "2","-3", '-' + "28", "2","-2", '-' + "29", "2","-1", '-' + "30", "2","0", '-' + "31", "1","0", '-' "32", "1","0", "" "33", "2","0", "" "34", "3","0", "" @@ -314,35 +314,35 @@ Here is the mapping of coordinates to coding positions: "40", "4","0", "" "41", "5","0", "" "42", "6","0", "" - "43", "1","0", "*" - "44", "1","1", "*" - "45", "1","2", "*" - "46", "1","3", "*" - "47", "2","-3", "*" - "48", "2","-2", "*" - "49", "2","-1", "*" - "50", "2","0", "*" - "51", "3","0", "*" - "52", "3","1", "*" - "53", "3","2", "*" - "54", "3","3", "*" - "55", "3","4", "*" - "56", "3","5", "*" - "57", "3","6", "*" - "58", "3","7", "*" - "59", "3","8", "*" - "60", "3","9", "*" - "61", "4","-9", "*" - "62", "4","-8", "*" - "63", "4","-7", "*" - "64", "4","-6", "*" - "65", "4","-5", "*" - "66", "4","-4", "*" - "67", "4","-3", "*" - "68", "4","-2", "*" - "69", "4","-1", "*" - "70", "4","0", "*" - "71", "5","0", "*" + "43", "1","0", '*' + "44", "1","1", '*' + "45", "1","2", '*' + "46", "1","3", '*' + "47", "2","-3", '*' + "48", "2","-2", '*' + "49", "2","-1", '*' + "50", "2","0", '*' + "51", "3","0", '*' + "52", "3","1", '*' + "53", "3","2", '*' + "54", "3","3", '*' + "55", "3","4", '*' + "56", "3","5", '*' + "57", "3","6", '*' + "58", "3","7", '*' + "59", "3","8", '*' + "60", "3","9", '*' + "61", "4","-9", '*' + "62", "4","-8", '*' + "63", "4","-7", '*' + "64", "4","-6", '*' + "65", "4","-5", '*' + "66", "4","-4", '*' + "67", "4","-3", '*' + "68", "4","-2", '*' + "69", "4","-1", '*' + "70", "4","0", '*' + "71", "5","0", '*' "72", "1","0", "d" "73", "2","0", "d" "74", "3","0", "d" From 2704e52de49a55960aa60f6f67fd2f144ffbd292 Mon Sep 17 00:00:00 2001 From: "X.Liu" <34545147+XLIU-hub@users.noreply.github.com> Date: Tue, 10 Mar 2026 16:30:59 +0100 Subject: [PATCH 048/127] Use code block for table --- README.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.rst b/README.rst index ee4ab6f..7406023 100644 --- a/README.rst +++ b/README.rst @@ -138,10 +138,9 @@ Notes Here is the mapping of coordinates to noncoding positions: -.. csv-table:: - :class: table-scroll - :header: "Coordinate", "Position", "Offset", "Region" +.. code-block:: text + Coordinate, Position, Offset, Region "0", "5","0", "u" "1", "4","0", "u" "2", "3","0", "u" @@ -224,6 +223,7 @@ Here is the mapping of coordinates to noncoding positions: "79", "8","0", "d" + Coding Class ------------ @@ -267,7 +267,7 @@ Notes Here is the mapping of coordinates to coding positions: -.. csv-table:: My Scrollable Table +.. csv-table:: :class: table-scroll :header: "Coordinate", "Position", "Offset", "Region" From 78183fdd586950fae3f971640ff0c7dd70be04fa Mon Sep 17 00:00:00 2001 From: "X.Liu" <34545147+XLIU-hub@users.noreply.github.com> Date: Tue, 10 Mar 2026 16:39:14 +0100 Subject: [PATCH 049/127] Update README.rst --- README.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 7406023..afeb981 100644 --- a/README.rst +++ b/README.rst @@ -138,9 +138,10 @@ Notes Here is the mapping of coordinates to noncoding positions: -.. code-block:: text +.. csv-table:: + :class: table-scroll + :header: "Coordinate", "Position", "Offset", "Region" - Coordinate, Position, Offset, Region "0", "5","0", "u" "1", "4","0", "u" "2", "3","0", "u" From 03309a8ac928bfa24c2c35b6318a75407a8cf199 Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Tue, 10 Mar 2026 22:11:18 +0100 Subject: [PATCH 050/127] Refactor: crossmapper and tests for degenerate option --- mutalyzer_crossmapper/crossmapper.py | 68 ++-- tests/test_crossmapper.py | 571 ++++++++++++++++----------- 2 files changed, 377 insertions(+), 262 deletions(-) diff --git a/mutalyzer_crossmapper/crossmapper.py b/mutalyzer_crossmapper/crossmapper.py index 9762598..5bc9f5c 100644 --- a/mutalyzer_crossmapper/crossmapper.py +++ b/mutalyzer_crossmapper/crossmapper.py @@ -69,46 +69,49 @@ def __init__(self, locations, cds, inverted=False): b0 = self._noncoding.to_position(cds[0]) b1 = self._noncoding.to_position(cds[1]) + e0 = self._noncoding.to_position(locations[0][0]) + e1 = self._noncoding.to_position(locations[-1][1]-1) if self._inverted: self._coding = (b1["position"] + b1["offset"] + 1, b0["position"] + b0["offset"] + 1) - self._cds_len = (b0["position"] + b0["offset"]) - (b1["position"] + b1["offset"]) + self._cds_len = abs((b0["position"] + b0["offset"]) - (b1["position"] + b1["offset"])) + self._exons_end = e0["position"] else: self._coding = (b0["position"] + b0["offset"], b1["position"] + b1["offset"]) - self._cds_len = (b1["position"] + b1["offset"]) - (b0["position"] + b0["offset"]) + self._cds_len = abs((b1["position"] + b1["offset"]) - (b0["position"] + b0["offset"])) + self._exons_end = e1["position"] def _coordinate_to_coding(self, coordinate): """Convert a coordinate to a coding position (c./r.). :arg int coordinate: Coordinate. - :returns tuple: Coding position (c./r.). + :returns dict: Coding position model (c./r.). """ - noncoding_pos = self._noncoding.to_position(coordinate) + noncoding_pos_m = self._noncoding.to_position(coordinate) - # on top of the noncoding position model, add CDs info - location = noncoding_pos["position"] - if noncoding_pos["region"] == "": - if location < self._coding[0]: # before CDs + location = noncoding_pos_m["position"] + if noncoding_pos_m["region"] == "": + if location < self._coding[0]: return { "position": self._coding[0] - location, - "offset": noncoding_pos["offset"], + "offset": noncoding_pos_m["offset"], "region": "-" } - elif location >= self._coding[1]: # after CDs + elif location >= self._coding[1]: return { "position": location - self._coding[1] + 1, - "offset": noncoding_pos["offset"], + "offset": noncoding_pos_m["offset"], "region": "*" } else: return { "position": location - self._coding[0] + 1, - "offset": noncoding_pos["offset"], + "offset": noncoding_pos_m["offset"], "region": "" } else: - return noncoding_pos + return noncoding_pos_m def coordinate_to_coding(self, coordinate, degenerate=False): """Convert a coordinate to a coding position (c./r.). @@ -116,57 +119,58 @@ def coordinate_to_coding(self, coordinate, degenerate=False): :arg int coordinate: Coordinate. :arg bool degenerate: Return a degenerate position. - :returns tuple: Coding position (c./r.). + :returns dict: Coding position model (c./r.). """ - pos = self._coordinate_to_coding(coordinate) - if degenerate and pos["region"] in ["u", "d"]: - if pos["region"] == "u": - pos["position"] = pos["position"] + self._coding[0] - pos["region"] = "-" - else: - pos["position"] = pos["position"] + self._coding[1] - pos["region"] = "*" - return pos + pos_m = self._coordinate_to_coding(coordinate) + if degenerate: + if pos_m["region"] == "u": + pos_m["position"] = pos_m["position"] + self._coding[0] + pos_m["region"] = "-" + if pos_m["region"] == "d": + pos_m["position"] = pos_m["position"] + self._exons_end - self._coding[1] + 1 + pos_m["region"] = "*" + return pos_m def coding_to_coordinate(self, pos_m): """Convert a coding position (c./r.) to a coordinate. - :arg tuple position: Coding position (c./r.). + :arg dict pos_m: Coding position model (c./r.). :returns int: Coordinate. """ region = pos_m["region"] if region == "u": - noncoding_pos = { - "position": abs(pos_m["position"]) + pos_m["offset"], + noncoding_pos_m = { + "position": pos_m["position"] - pos_m["offset"], "offset": 0, "region": "u" } elif region == "d": - noncoding_pos = { - "position": abs(pos_m["position"]) + pos_m["offset"], + noncoding_pos_m = { + "position": pos_m["position"] + pos_m["offset"], "offset": 0, "region": "d" } elif region == "": - noncoding_pos = { + noncoding_pos_m = { "position": pos_m["position"] + self._coding[0] -1, "offset": pos_m["offset"], "region": "" } + # add checks for degenerate results? elif region == "-": - noncoding_pos = { + noncoding_pos_m = { "position": self._coding[0] - pos_m["position"], "offset": pos_m["offset"], "region": "" } else: # * - noncoding_pos = { + noncoding_pos_m = { "position": self._coding[1] + pos_m["position"] - 1, "offset": pos_m["offset"], "region": "" } - return self._noncoding.to_coordinate(noncoding_pos) + return self._noncoding.to_coordinate(noncoding_pos_m) def coordinate_to_protein(self, coordinate): diff --git a/tests/test_crossmapper.py b/tests/test_crossmapper.py index 6ba2d83..dbaf87b 100644 --- a/tests/test_crossmapper.py +++ b/tests/test_crossmapper.py @@ -11,9 +11,17 @@ def test_Genomic(): crossmap = Genomic() invariant( - crossmap.coordinate_to_genomic, 0, crossmap.genomic_to_coordinate, {"position": 1}) + crossmap.coordinate_to_genomic, + 0, + crossmap.genomic_to_coordinate, + {"position": 1}, + ) invariant( - crossmap.coordinate_to_genomic, 98, crossmap.genomic_to_coordinate, {"position": 99}) + crossmap.coordinate_to_genomic, + 98, + crossmap.genomic_to_coordinate, + {"position": 99}, + ) def test_NonCoding(): @@ -22,19 +30,31 @@ def test_NonCoding(): # Boundary between upstream and transcript. invariant( - crossmap.coordinate_to_noncoding, 4, - crossmap.noncoding_to_coordinate, {"position": 1, "offset": 0, "region":"u"}) + crossmap.coordinate_to_noncoding, + 4, + crossmap.noncoding_to_coordinate, + {"position": 1, "offset": 0, "region": "u"}, + ) invariant( - crossmap.coordinate_to_noncoding, 5, - crossmap.noncoding_to_coordinate, {"position": 1, "offset": 0, "region": ""}) + crossmap.coordinate_to_noncoding, + 5, + crossmap.noncoding_to_coordinate, + {"position": 1, "offset": 0, "region": ""}, + ) # Boundary between downstream and transcript. invariant( - crossmap.coordinate_to_noncoding, 71, - crossmap.noncoding_to_coordinate, {"position": 22, "offset": 0, "region": ""}) + crossmap.coordinate_to_noncoding, + 71, + crossmap.noncoding_to_coordinate, + {"position": 22, "offset": 0, "region": ""}, + ) invariant( - crossmap.coordinate_to_noncoding, 72, - crossmap.noncoding_to_coordinate, {"position": 1, "offset": 0, "region": "d"}) + crossmap.coordinate_to_noncoding, + 72, + crossmap.noncoding_to_coordinate, + {"position": 1, "offset": 0, "region": "d"}, + ) def test_NonCoding_inverted(): @@ -43,19 +63,31 @@ def test_NonCoding_inverted(): # Boundary between upstream and transcript. invariant( - crossmap.coordinate_to_noncoding, 72, - crossmap.noncoding_to_coordinate, {"position": 1, "offset": 0, "region": "u"}) + crossmap.coordinate_to_noncoding, + 72, + crossmap.noncoding_to_coordinate, + {"position": 1, "offset": 0, "region": "u"}, + ) invariant( - crossmap.coordinate_to_noncoding, 71, - crossmap.noncoding_to_coordinate, {"position": 1, "offset": 0, "region": ""}) + crossmap.coordinate_to_noncoding, + 71, + crossmap.noncoding_to_coordinate, + {"position": 1, "offset": 0, "region": ""}, + ) # Boundary between downstream and transcript. invariant( - crossmap.coordinate_to_noncoding, 5, - crossmap.noncoding_to_coordinate, {"position": 22, "offset": 0, "region": ""}) + crossmap.coordinate_to_noncoding, + 5, + crossmap.noncoding_to_coordinate, + {"position": 22, "offset": 0, "region": ""}, + ) invariant( - crossmap.coordinate_to_noncoding, 4, - crossmap.noncoding_to_coordinate, {"position": 1, "offset": 0, "region": "d"}) + crossmap.coordinate_to_noncoding, + 4, + crossmap.noncoding_to_coordinate, + {"position": 1, "offset": 0, "region": "d"}, + ) def test_NonCoding_degenerate(): @@ -64,19 +96,23 @@ def test_NonCoding_degenerate(): # Boundary between upstream and transcript. degenerate_equal( - crossmap.noncoding_to_coordinate, 4, + crossmap.noncoding_to_coordinate, + 4, [ - {"position": 1, "offset": 0, "region":"u"}, - {"position": 0, "offset": -1, "region":"u"} - ]) + {"position": 1, "offset": 0, "region": "u"}, + {"position": 0, "offset": -1, "region": "u"}, + ], + ) # Boundary between downstream and transcript. degenerate_equal( - crossmap.noncoding_to_coordinate, 72, + crossmap.noncoding_to_coordinate, + 72, [ {"position": 1, "offset": 0, "region": "d"}, - {"position": 0, "offset": 1, "region": "d"} - ]) + {"position": 0, "offset": 1, "region": "d"}, + ], + ) def test_NonCoding_inverted_degenerate(): @@ -85,54 +121,50 @@ def test_NonCoding_inverted_degenerate(): # Boundary between upstream and transcript. degenerate_equal( - crossmap.noncoding_to_coordinate, 72, - [{"position": 1, "offset": 0, "region": "u"}]) + crossmap.noncoding_to_coordinate, + 72, + [{"position": 1, "offset": 0, "region": "u"}], + ) # Boundary between downstream and transcript. degenerate_equal( - crossmap.noncoding_to_coordinate, 4, - [{"position": 1 , "offset": 0, "region": "d"}]) + crossmap.noncoding_to_coordinate, + 4, + [{"position": 1, "offset": 0, "region": "d"}], + ) + -_exons = [(5, 8), (14, 20), (30, 35), (40, 44), (50, 52), (70, 72)] -_cds = (32, 43) def test_Coding(): """Forward oriented coding transcript.""" crossmap = Coding(_exons, _cds) # Boundary between 5' and CDS. invariant( - crossmap.coordinate_to_coding, 31, + crossmap.coordinate_to_coding, + 31, crossmap.coding_to_coordinate, - {"position": 1, - "offset":0, - "region":"-" - } + {"position": 1, "offset": 0, "region": "-"}, ) invariant( - crossmap.coordinate_to_coding, 32, + crossmap.coordinate_to_coding, + 32, crossmap.coding_to_coordinate, - {"position": 1, - "offset":0, - "region":"" - } - ) + {"position": 1, "offset": 0, "region": ""}, + ) # Boundary between CDS and 3'. invariant( - crossmap.coordinate_to_coding, 42, + crossmap.coordinate_to_coding, + 42, crossmap.coding_to_coordinate, - {"position": 6, - "offset":0, - "region":"" - }) + {"position": 6, "offset": 0, "region": ""}, + ) invariant( - crossmap.coordinate_to_coding, 43, + crossmap.coordinate_to_coding, + 43, crossmap.coding_to_coordinate, - {"position": 1, - "offset":0, - "region":"*" - } - ) + {"position": 1, "offset": 0, "region": "*"}, + ) def test_Coding_inverted(): @@ -141,42 +173,30 @@ def test_Coding_inverted(): # Boundary between 5' and CDS. invariant( - crossmap.coordinate_to_coding, 43, + crossmap.coordinate_to_coding, + 43, crossmap.coding_to_coordinate, - { - "position": 1, - "offset": 0, - "region": "-" - } + {"position": 1, "offset": 0, "region": "-"}, ) invariant( - crossmap.coordinate_to_coding, 42, + crossmap.coordinate_to_coding, + 42, crossmap.coding_to_coordinate, - { - "position": 1, - "offset": 0, - "region": "" - } - ) + {"position": 1, "offset": 0, "region": ""}, + ) # Boundary between CDS and 3'. invariant( - crossmap.coordinate_to_coding, 32, + crossmap.coordinate_to_coding, + 32, crossmap.coding_to_coordinate, - { - "position": 6, - "offset": 0, - "region": "" - } - ) + {"position": 6, "offset": 0, "region": ""}, + ) invariant( - crossmap.coordinate_to_coding, 31, + crossmap.coordinate_to_coding, + 31, crossmap.coding_to_coordinate, - { - "position": 1, - "offset": 0, - "region": "*" - } + {"position": 1, "offset": 0, "region": "*"}, ) @@ -186,23 +206,31 @@ def test_Coding_regions(): # Upstream odd length intron between two regions. invariant( - crossmap.coordinate_to_coding, 25, + crossmap.coordinate_to_coding, + 25, crossmap.coding_to_coordinate, - {'position': 1, 'offset': 5, 'region': '-'}) + {"position": 1, "offset": 5, "region": "-"}, + ) invariant( - crossmap.coordinate_to_coding, 26, + crossmap.coordinate_to_coding, + 26, crossmap.coding_to_coordinate, - {'position': 1, 'offset': -4, 'region': ''}) + {"position": 1, "offset": -4, "region": ""}, + ) # Downstream odd length intron between two regions. invariant( - crossmap.coordinate_to_coding, 44, + crossmap.coordinate_to_coding, + 44, crossmap.coding_to_coordinate, - {'position': 10, 'offset': 5, 'region': ''}) + {"position": 10, "offset": 5, "region": ""}, + ) invariant( - crossmap.coordinate_to_coding, 45, + crossmap.coordinate_to_coding, + 45, crossmap.coding_to_coordinate, - {'position': 1, 'offset': -4, 'region': '*'}) + {"position": 1, "offset": -4, "region": "*"}, + ) def test_Coding_regions_inverted(): @@ -211,23 +239,31 @@ def test_Coding_regions_inverted(): # Upstream odd length intron between two regions. invariant( - crossmap.coordinate_to_coding, 44, + crossmap.coordinate_to_coding, + 44, crossmap.coding_to_coordinate, - {'position': 1, 'offset': 5, 'region': '-'}) + {"position": 1, "offset": 5, "region": "-"}, + ) invariant( - crossmap.coordinate_to_coding, 43, + crossmap.coordinate_to_coding, + 43, crossmap.coding_to_coordinate, - {'position': 1, 'offset': -4, 'region': ''}) + {"position": 1, "offset": -4, "region": ""}, + ) # Downstream odd length intron between two regions. invariant( - crossmap.coordinate_to_coding, 25, + crossmap.coordinate_to_coding, + 25, crossmap.coding_to_coordinate, - {'position': 10, 'offset': 5, 'region': ''}) + {"position": 10, "offset": 5, "region": ""}, + ) invariant( - crossmap.coordinate_to_coding, 24, + crossmap.coordinate_to_coding, + 24, crossmap.coding_to_coordinate, - {'position': 1, 'offset': -4, 'region': '*'}) + {"position": 1, "offset": -4, "region": "*"}, + ) def test_Coding_no_utr5(): @@ -236,13 +272,17 @@ def test_Coding_no_utr5(): # Direct transition from upstream to CDS. invariant( - crossmap.coordinate_to_coding, 9, + crossmap.coordinate_to_coding, + 9, crossmap.coding_to_coordinate, - {'position': 1, 'offset': 0, 'region': 'u'}) # serialize result : u1 + {"position": 1, "offset": 0, "region": "u"}, + ) invariant( - crossmap.coordinate_to_coding, 10, + crossmap.coordinate_to_coding, + 10, crossmap.coding_to_coordinate, - {'position': 1, 'offset': 0, 'region': ''}) # serialize result: 1 + {"position": 1, "offset": 0, "region": ""}, + ) def test_Coding_no_utr5_inverted(): @@ -251,13 +291,17 @@ def test_Coding_no_utr5_inverted(): # Direct transition from upstream to CDS. invariant( - crossmap.coordinate_to_coding, 20, + crossmap.coordinate_to_coding, + 20, crossmap.coding_to_coordinate, - {'position': 1, 'offset': 0, 'region': 'u'}) + {"position": 1, "offset": 0, "region": "u"}, + ) invariant( - crossmap.coordinate_to_coding, 19, + crossmap.coordinate_to_coding, + 19, crossmap.coding_to_coordinate, - {'position': 2, 'offset': 0, 'region': '-'}) + {"position": 2, "offset": 0, "region": "-"}, + ) def test_Coding_no_utr3(): @@ -265,15 +309,18 @@ def test_Coding_no_utr3(): crossmap = Coding([(10, 20)], (15, 20)) # Direct transition from CDS to downstream. - #TODO: invariant( - crossmap.coordinate_to_coding, 19, + crossmap.coordinate_to_coding, + 19, crossmap.coding_to_coordinate, - {'position': 9, 'offset': 0, 'region': '*'}) + {"position": 9, "offset": 0, "region": "*"}, + ) invariant( - crossmap.coordinate_to_coding, 20, + crossmap.coordinate_to_coding, + 20, crossmap.coding_to_coordinate, - {'position': 1, 'offset': 0, 'region': 'd'}) + {"position": 1, "offset": 0, "region": "d"}, + ) def test_Coding_no_utr3_inverted(): @@ -282,13 +329,17 @@ def test_Coding_no_utr3_inverted(): # Direct transition from CDS to downstream. invariant( - crossmap.coordinate_to_coding, 10, + crossmap.coordinate_to_coding, + 10, crossmap.coding_to_coordinate, - {'position': 5, 'offset': 0, 'region': ''}) + {"position": 5, "offset": 0, "region": ""}, + ) invariant( - crossmap.coordinate_to_coding, 9, + crossmap.coordinate_to_coding, + 9, crossmap.coding_to_coordinate, - {'position': 1, 'offset': 0, 'region': 'd'}) + {"position": 1, "offset": 0, "region": "d"}, + ) def test_Coding_small_utr5(): @@ -297,18 +348,23 @@ def test_Coding_small_utr5(): # Transition from upstream to 5' UTR to CDS. invariant( - crossmap.coordinate_to_coding, 9, + crossmap.coordinate_to_coding, + 9, crossmap.coding_to_coordinate, - {'position': 1, 'offset': 0, 'region': 'u'}) + {"position": 1, "offset": 0, "region": "u"}, + ) invariant( - crossmap.coordinate_to_coding, 10, + crossmap.coordinate_to_coding, + 10, crossmap.coding_to_coordinate, - {'position': 1, 'offset': 0, 'region': '-'} + {"position": 1, "offset": 0, "region": "-"}, ) invariant( - crossmap.coordinate_to_coding, 11, + crossmap.coordinate_to_coding, + 11, crossmap.coding_to_coordinate, - {'position': 1, 'offset': 0, 'region': ''}) + {"position": 1, "offset": 0, "region": ""}, + ) def test_Coding_small_utr5_inverted(): @@ -317,17 +373,23 @@ def test_Coding_small_utr5_inverted(): # Transition from upstream to 5' UTR to CDS. invariant( - crossmap.coordinate_to_coding, 20, - crossmap.coding_to_coordinate,# (-1, -1, -1, -1) - {'position': 1, 'offset': 0, 'region': 'u'}) + crossmap.coordinate_to_coding, + 20, + crossmap.coding_to_coordinate, + {"position": 1, "offset": 0, "region": "u"}, + ) invariant( - crossmap.coordinate_to_coding, 19, - crossmap.coding_to_coordinate, #(-1, 0, -1, 0)) - {'position': 1, 'offset': 0, 'region': '-'}) + crossmap.coordinate_to_coding, + 19, + crossmap.coding_to_coordinate, + {"position": 1, "offset": 0, "region": "-"}, + ) invariant( - crossmap.coordinate_to_coding, 18, + crossmap.coordinate_to_coding, + 18, crossmap.coding_to_coordinate, - {'position': 1, 'offset': 0, 'region': ''}) + {"position": 1, "offset": 0, "region": ""}, + ) def test_Coding_small_utr3(): @@ -336,17 +398,23 @@ def test_Coding_small_utr3(): # Transition from CDS to 3' UTR to downstream. invariant( - crossmap.coordinate_to_coding, 18, + crossmap.coordinate_to_coding, + 18, crossmap.coding_to_coordinate, - {'position': 4, 'offset': 0, 'region': ''}) + {"position": 4, "offset": 0, "region": ""}, + ) invariant( - crossmap.coordinate_to_coding, 19, - crossmap.coding_to_coordinate, #(1, 0, 1, 0) - {'position': 1, 'offset': 0, 'region': '*'}) + crossmap.coordinate_to_coding, + 19, + crossmap.coding_to_coordinate, + {"position": 1, "offset": 0, "region": "*"}, + ) invariant( - crossmap.coordinate_to_coding, 20, - crossmap.coding_to_coordinate, #(1, 1, 1, 1)) - {'position': 1, 'offset': 0, 'region': 'd'}) + crossmap.coordinate_to_coding, + 20, + crossmap.coding_to_coordinate, + {"position": 1, "offset": 0, "region": "d"}, + ) def test_Coding_small_utr3_inverted(): @@ -355,17 +423,23 @@ def test_Coding_small_utr3_inverted(): # Transition from CDS to 3' UTR to downstream. invariant( - crossmap.coordinate_to_coding, 11, + crossmap.coordinate_to_coding, + 11, crossmap.coding_to_coordinate, - {'position': 4, 'offset': 0, 'region': ''}) + {"position": 4, "offset": 0, "region": ""}, + ) invariant( - crossmap.coordinate_to_coding, 10, + crossmap.coordinate_to_coding, + 10, crossmap.coding_to_coordinate, - {'position': 1, 'offset': 0, 'region': '*'}) + {"position": 1, "offset": 0, "region": "*"}, + ) invariant( - crossmap.coordinate_to_coding, 9, + crossmap.coordinate_to_coding, + 9, crossmap.coding_to_coordinate, - {'position': 1, 'offset': 0, 'region': 'd'}) + {"position": 1, "offset": 0, "region": "d"}, + ) def test_Coding_degenerate(): @@ -373,57 +447,45 @@ def test_Coding_degenerate(): crossmap = Coding([(10, 20)], (11, 19)) degenerate_equal( - crossmap.coding_to_coordinate, 9, + crossmap.coding_to_coordinate, + 9, [ - {'position': 1, 'offset': 0, 'region': 'u'}, - {'position': 2, 'offset': 1, 'region': 'u'}, - {'position': 0, 'offset': -1, 'region': 'u'}, - {'position': 1, 'offset': -1, 'region': '-'}, - {'position': 2, 'offset': 0, 'region': '-'}, - {'position': 1, 'offset': -2, 'region': ''}, - ]) + {"position": 1, "offset": 0, "region": "u"}, + {"position": 2, "offset": 1, "region": "u"}, + {"position": 0, "offset": -1, "region": "u"}, + ], + ) degenerate_equal( - crossmap.coding_to_coordinate, 20, + crossmap.coding_to_coordinate, + 20, [ - {'position': 1, 'offset': 0, 'region': 'd'}, - {'position': 8, 'offset': -7, 'region': 'd'}, - {'position': 0, 'offset': -1, 'region': 'd'}, - {'position': 2, 'offset': 0, 'region': '*'}, - {'position': 1, 'offset': 1, 'region': '*'}, - {'position': 8, 'offset': 2, 'region': ''}, - ] + {"position": 1, "offset": 0, "region": "d"}, + {"position": 8, "offset": -7, "region": "d"}, + {"position": 0, "offset": -1, "region": "d"}, + ], ) -#TODO: Add tests for silently degenerate, -# position value <= 0 -# offset value > intron length - - def test_Coding_inverted_degenerate(): """Degenerate upstream and downstream positions are silently corrected.""" crossmap = Coding([(10, 20)], (11, 19), True) degenerate_equal( - crossmap.coding_to_coordinate, 20, + crossmap.coding_to_coordinate, + 20, [ - {'position': 1, 'offset': 0, 'region': 'u'}, - {'position': 2, 'offset': 1, 'region': 'u'}, - {'position': 0, 'offset': -1, 'region': 'u'}, - {'position': 1, 'offset': -2, 'region': ''}, - {'position': 1, 'offset': -1, 'region': '-'}, - {'position': 2, 'offset': 0, 'region': '-'} - ] + {"position": 1, "offset": 0, "region": "u"}, + {"position": 2, "offset": 1, "region": "u"}, + {"position": 0, "offset": -1, "region": "u"}, + ], ) degenerate_equal( - crossmap.coding_to_coordinate, 9, + crossmap.coding_to_coordinate, + 9, [ - {'position': 1, 'offset': 0, 'region': 'd'}, - {'position': 2, 'offset': -1, 'region': 'd'}, - {'position': 1, 'offset': 1, 'region': '*'}, - {'position': 1, 'offset': 1, 'region': '*'}, - {'position': 10, 'offset': 0, 'region': ''}, - ] + {"position": 1, "offset": 0, "region": "d"}, + {"position": 2, "offset": -1, "region": "d"}, + ], ) @@ -431,37 +493,50 @@ def test_Coding_degenerate_return(): """Degenerate upstream and downstream positions may be returned.""" crossmap = Coding([(10, 20)], (11, 19)) - for i in range(0, 30): - print(i, crossmap.coordinate_to_coding(i), crossmap.coordinate_to_coding(i, True)) - - assert crossmap.coordinate_to_coding(9, True) == {'position': 2, 'offset': 0, 'region': '-'} - assert crossmap.coordinate_to_coding(20, True) == {'position': 2, 'offset': 0, 'region': '*'} + assert crossmap.coordinate_to_coding(9, True) == { + "position": 2, + "offset": 0, + "region": "-", + } + assert crossmap.coordinate_to_coding(20, True) == { + "position": 2, + "offset": 0, + "region": "*", + } def test_Coding_inverted_degenerate_return(): """Degenerate upstream and downstream positions may be returned.""" crossmap = Coding([(10, 20)], (11, 19), True) for i in range(0, 30): - print(i, crossmap.coordinate_to_coding(i), crossmap.coordinate_to_coding(i, True)) + print( + i, crossmap.coordinate_to_coding(i), crossmap.coordinate_to_coding(i, True) + ) - assert crossmap.coordinate_to_coding(20, True) == {'position': 2, 'offset': 0, 'region': '-'} - assert crossmap.coordinate_to_coding(9, True) == {'position': 2, 'offset': 0, 'region': '*'} + assert crossmap.coordinate_to_coding(20, True) == { + "position": 2, + "offset": 0, + "region": "-", + } + assert crossmap.coordinate_to_coding(9, True) == { + "position": 2, + "offset": 0, + "region": "*", + } def test_Coding_degenerate_no_return(): """Degenerate internal positions do not exist.""" crossmap = Coding([(10, 20), (30, 40)], (10, 40)) - assert (crossmap.coordinate_to_coding(25) == - crossmap.coordinate_to_coding(25, True)) + assert crossmap.coordinate_to_coding(25) == crossmap.coordinate_to_coding(25, True) def test_Coding_inverted_degenerate_no_return(): """Degenerate internal positions do not exist.""" crossmap = Coding([(10, 20), (30, 40)], (10, 40), True) - assert (crossmap.coordinate_to_coding(25) == - crossmap.coordinate_to_coding(25, True)) + assert crossmap.coordinate_to_coding(25) == crossmap.coordinate_to_coding(25, True) def test_Coding_no_utr_degenerate(): @@ -469,45 +544,45 @@ def test_Coding_no_utr_degenerate(): crossmap = Coding([(10, 11)], (10, 11)) degenerate_equal( - crossmap.coding_to_coordinate, 9, + crossmap.coding_to_coordinate, + 9, [ - {'position': 1, 'offset': 0, 'region': '-'}, - {'position': 1, 'offset': 0, 'region': 'u'}, - {'position': 1, 'offset': -1, 'region': ''}, - ] + {"position": 2, "offset": 1, "region": "u"}, + {"position": 1, "offset": 0, "region": "u"}, + ], ) degenerate_equal( - crossmap.coding_to_coordinate, 11, + crossmap.coding_to_coordinate, + 11, [ - {'position': 1, 'offset': 0, 'region': '*'}, - {'position': 1, 'offset': 0, 'region': 'd'}, - {'position': 1, 'offset': 1, 'region': ''} - ] + {"position": 1, "offset": 0, "region": "d"}, + {"position": 2, "offset": -1, "region": "d"}, + ], ) + def test_Coding_inverted_no_utr_degenerate(): """UTRs may be missing.""" crossmap = Coding([(10, 11)], (10, 11), True) - # [(1, -1, 0, -1), (-1, 0, -1, -1), (1, -2, 1, -1)]) degenerate_equal( - crossmap.coding_to_coordinate, 11, + crossmap.coding_to_coordinate, + 11, [ - {'position': 1, 'offset': 0, 'region': 'u'}, - {'position': 2, 'offset': 1, 'region': 'u'}, - {'position': 1, 'offset': 0, 'region': '-'}, - {'position': 1, 'offset': 0, 'region': '*'}, - ] -) + {"position": 1, "offset": 0, "region": "u"}, + {"position": 2, "offset": 1, "region": "u"}, + ], + ) degenerate_equal( - crossmap.coding_to_coordinate, 9, + crossmap.coding_to_coordinate, + 9, [ - {'position': 1, 'offset': 0, 'region': 'd'}, - {'position': 1, 'offset': 0, 'region': '*'}, - {'position': 1, 'offset': -1, 'region': ''}, - ] + {"position": 1, "offset": 0, "region": "d"}, + {"position": 2, "offset": -1, "region": "d"}, + ], ) + def test_Coding_no_utr_degenerate_return(): """UTRs may be missing.""" crossmap = Coding([(10, 11)], (10, 11)) @@ -515,50 +590,86 @@ def test_Coding_no_utr_degenerate_return(): print(crossmap.coordinate_to_coding(11), crossmap.coordinate_to_coding(11, True)) print(crossmap.coordinate_to_coding(12), crossmap.coordinate_to_coding(12, True)) - assert crossmap.coordinate_to_coding(8, True) == {'position': 2, 'offset': 0, 'region': '-'}#(-2, 0, -1, -2) - assert crossmap.coordinate_to_coding(9, True) == {'position': 1, 'offset': 0, 'region': '-'}#(-1, 0, -1, -1) - assert crossmap.coordinate_to_coding(11, True) == {'position': 1, 'offset': 0, 'region': '*'}#(1, 0, 1, 1) - assert crossmap.coordinate_to_coding(12, True) == {'position': 2, 'offset': 0, 'region': '*'}#(2, 0, 1, 2) + assert crossmap.coordinate_to_coding(8, True) == { + "position": 2, + "offset": 0, + "region": "-", + } + assert crossmap.coordinate_to_coding(9, True) == { + "position": 1, + "offset": 0, + "region": "-", + } + assert crossmap.coordinate_to_coding(11, True) == { + "position": 1, + "offset": 0, + "region": "*", + } + assert crossmap.coordinate_to_coding(12, True) == { + "position": 2, + "offset": 0, + "region": "*", + } def test_Coding_inverted_no_utr_degenerate_return(): """UTRs may be missing.""" crossmap = Coding([(10, 11)], (10, 11), True) - assert crossmap.coordinate_to_coding(11, True) == (-1, 0, -1, -1) - assert crossmap.coordinate_to_coding(9, True) == (1, 0, 1, 1) + assert crossmap.coordinate_to_coding(11, True) == { + "position": 3, + "offset": 0, + "region": "-", + } + assert crossmap.coordinate_to_coding(9, True) == { + "position": 1, + "offset": 0, + "region": "*", + } def test_Coding_protein(): """Protein positions.""" crossmap = Coding(_exons, _cds) - # Boundary between 5' UTR and CDS. + # Boundary between 5' UTR and CDS invariant( - crossmap.coordinate_to_protein, 31, + crossmap.coordinate_to_protein, + 31, crossmap.protein_to_coordinate, - {'position': 1, "position_in_codon": 1, 'offset': 0, 'region': '-'}) + {"position": 1, "position_in_codon": 1, "offset": 0, "region": "-"}, + ) invariant( - crossmap.coordinate_to_protein, 32, + crossmap.coordinate_to_protein, + 32, crossmap.protein_to_coordinate, - {'position': 1, "position_in_codon": 1, 'offset': 0, 'region': ''}) + {"position": 1, "position_in_codon": 1, "offset": 0, "region": ""}, + ) # Intron boundary. invariant( - crossmap.coordinate_to_protein, 34, + crossmap.coordinate_to_protein, + 34, crossmap.protein_to_coordinate, - {'position': 1, "position_in_codon": 3, 'offset': 0, 'region': ''}) + {"position": 1, "position_in_codon": 3, "offset": 0, "region": ""}, + ) invariant( - crossmap.coordinate_to_protein, 35, + crossmap.coordinate_to_protein, + 35, crossmap.protein_to_coordinate, - {'position': 1, "position_in_codon": 3, 'offset': 1, 'region': ''}) + {"position": 1, "position_in_codon": 3, "offset": 1, "region": ""}, + ) # Boundary between CDS and 3' UTR. invariant( - crossmap.coordinate_to_protein, 42, + crossmap.coordinate_to_protein, + 42, crossmap.protein_to_coordinate, - {'position': 2, "position_in_codon": 3, 'offset': 0, 'region': ''}) + {"position": 2, "position_in_codon": 3, "offset": 0, "region": ""}, + ) invariant( - crossmap.coordinate_to_protein, 43, + crossmap.coordinate_to_protein, + 43, crossmap.protein_to_coordinate, - {'position': 1, "position_in_codon": 1, 'offset': 0, 'region': '*'}) + {"position": 1, "position_in_codon": 1, "offset": 0, "region": "*"}, + ) From fccc27555324864800126ea59c7b178ada24b68c Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Wed, 11 Mar 2026 08:53:11 +0100 Subject: [PATCH 051/127] Add surpport for degenerate position model as input --- mutalyzer_crossmapper/crossmapper.py | 55 +++++++++++++++++++--------- tests/test_crossmapper.py | 13 +++---- 2 files changed, 44 insertions(+), 24 deletions(-) diff --git a/mutalyzer_crossmapper/crossmapper.py b/mutalyzer_crossmapper/crossmapper.py index 5bc9f5c..65b07d9 100644 --- a/mutalyzer_crossmapper/crossmapper.py +++ b/mutalyzer_crossmapper/crossmapper.py @@ -68,18 +68,20 @@ def __init__(self, locations, cds, inverted=False): NonCoding.__init__(self, locations, inverted) b0 = self._noncoding.to_position(cds[0]) - b1 = self._noncoding.to_position(cds[1]) + b1 = self._noncoding.to_position(cds[1]-1) e0 = self._noncoding.to_position(locations[0][0]) e1 = self._noncoding.to_position(locations[-1][1]-1) if self._inverted: - self._coding = (b1["position"] + b1["offset"] + 1, b0["position"] + b0["offset"] + 1) + self._coding = (b1["position"] + b1["offset"], b0["position"] + b0["offset"] + 1) self._cds_len = abs((b0["position"] + b0["offset"]) - (b1["position"] + b1["offset"])) - self._exons_end = e0["position"] + self._exons_end = e1["position"] + self._exons_start = e0["position"] else: - self._coding = (b0["position"] + b0["offset"], b1["position"] + b1["offset"]) + self._coding = (b0["position"] + b0["offset"], b1["position"] + b1["offset"] +1) self._cds_len = abs((b1["position"] + b1["offset"]) - (b0["position"] + b0["offset"])) self._exons_end = e1["position"] + self._exons_start = e0["position"] def _coordinate_to_coding(self, coordinate): """Convert a coordinate to a coding position (c./r.). @@ -89,7 +91,6 @@ def _coordinate_to_coding(self, coordinate): :returns dict: Coding position model (c./r.). """ noncoding_pos_m = self._noncoding.to_position(coordinate) - location = noncoding_pos_m["position"] if noncoding_pos_m["region"] == "": if location < self._coding[0]: @@ -124,10 +125,16 @@ def coordinate_to_coding(self, coordinate, degenerate=False): pos_m = self._coordinate_to_coding(coordinate) if degenerate: if pos_m["region"] == "u": - pos_m["position"] = pos_m["position"] + self._coding[0] + if self._inverted: + pos_m["position"] = pos_m["position"] + self._exons_start - self._coding[1] + 1 + else: + pos_m["position"] = pos_m["position"] + self._coding[0] pos_m["region"] = "-" if pos_m["region"] == "d": - pos_m["position"] = pos_m["position"] + self._exons_end - self._coding[1] + 1 + if self._inverted: + pos_m["position"] = pos_m["position"] + self._coding[0] + else: + pos_m["position"] = pos_m["position"] + self._exons_end - self._coding[1] + 1 pos_m["region"] = "*" return pos_m @@ -159,17 +166,31 @@ def coding_to_coordinate(self, pos_m): } # add checks for degenerate results? elif region == "-": - noncoding_pos_m = { - "position": self._coding[0] - pos_m["position"], - "offset": pos_m["offset"], - "region": "" - } + if pos_m["position"] > self._coding[0]: #degenerate result + noncoding_pos_m = { + "position": pos_m["position"] - self._coding[0], + "offset": pos_m["offset"], + "region": "u" + } + else: + noncoding_pos_m = { + "position": self._coding[0] - pos_m["position"], + "offset": pos_m["offset"], + "region": "" + } else: # * - noncoding_pos_m = { - "position": self._coding[1] + pos_m["position"] - 1, - "offset": pos_m["offset"], - "region": "" - } + if pos_m["position"] > self._coding[1]: + noncoding_pos_m = { + "position": pos_m["position"] - self._coding[1], + "offset": pos_m["offset"], + "region": "d" + } + else: + noncoding_pos_m = { + "position": self._coding[1] + pos_m["position"] - 1, + "offset": pos_m["offset"], + "region": "" + } return self._noncoding.to_coordinate(noncoding_pos_m) diff --git a/tests/test_crossmapper.py b/tests/test_crossmapper.py index dbaf87b..e2cccbe 100644 --- a/tests/test_crossmapper.py +++ b/tests/test_crossmapper.py @@ -300,7 +300,7 @@ def test_Coding_no_utr5_inverted(): crossmap.coordinate_to_coding, 19, crossmap.coding_to_coordinate, - {"position": 2, "offset": 0, "region": "-"}, + {"position": 1 , "offset": 0, "region": ""}, ) @@ -313,7 +313,7 @@ def test_Coding_no_utr3(): crossmap.coordinate_to_coding, 19, crossmap.coding_to_coordinate, - {"position": 9, "offset": 0, "region": "*"}, + {"position": 5, "offset": 0, "region": ""}, ) invariant( crossmap.coordinate_to_coding, @@ -508,10 +508,7 @@ def test_Coding_degenerate_return(): def test_Coding_inverted_degenerate_return(): """Degenerate upstream and downstream positions may be returned.""" crossmap = Coding([(10, 20)], (11, 19), True) - for i in range(0, 30): - print( - i, crossmap.coordinate_to_coding(i), crossmap.coordinate_to_coding(i, True) - ) + assert crossmap.coordinate_to_coding(20, True) == { "position": 2, @@ -571,6 +568,8 @@ def test_Coding_inverted_no_utr_degenerate(): [ {"position": 1, "offset": 0, "region": "u"}, {"position": 2, "offset": 1, "region": "u"}, + {"position": 1, "offset": 0, "region": "-"}, + ], ) degenerate_equal( @@ -617,7 +616,7 @@ def test_Coding_inverted_no_utr_degenerate_return(): crossmap = Coding([(10, 11)], (10, 11), True) assert crossmap.coordinate_to_coding(11, True) == { - "position": 3, + "position": 1, "offset": 0, "region": "-", } From 00e98bdc7df4643ac3eef909555e5338f8edf7b8 Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Wed, 11 Mar 2026 09:17:46 +0100 Subject: [PATCH 052/127] Add degenerate tests --- tests/test_crossmapper.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tests/test_crossmapper.py b/tests/test_crossmapper.py index e2cccbe..788863b 100644 --- a/tests/test_crossmapper.py +++ b/tests/test_crossmapper.py @@ -477,6 +477,7 @@ def test_Coding_inverted_degenerate(): {"position": 1, "offset": 0, "region": "u"}, {"position": 2, "offset": 1, "region": "u"}, {"position": 0, "offset": -1, "region": "u"}, + {"position": 2, "offset": 0, "region": "-"}, ], ) degenerate_equal( @@ -485,6 +486,7 @@ def test_Coding_inverted_degenerate(): [ {"position": 1, "offset": 0, "region": "d"}, {"position": 2, "offset": -1, "region": "d"}, + {"position": 2, "offset": 0, "region": "*"}, ], ) @@ -546,6 +548,7 @@ def test_Coding_no_utr_degenerate(): [ {"position": 2, "offset": 1, "region": "u"}, {"position": 1, "offset": 0, "region": "u"}, + {"position": 1, "offset": 0, "region": "-"}, ], ) degenerate_equal( @@ -554,6 +557,7 @@ def test_Coding_no_utr_degenerate(): [ {"position": 1, "offset": 0, "region": "d"}, {"position": 2, "offset": -1, "region": "d"}, + {"position": 1, "offset": 0, "region": "*"}, ], ) @@ -578,6 +582,8 @@ def test_Coding_inverted_no_utr_degenerate(): [ {"position": 1, "offset": 0, "region": "d"}, {"position": 2, "offset": -1, "region": "d"}, + {"position": 1, "offset": 0, "region": "*"}, + {"position": 1, "offset": 1, "region": ""}, ], ) @@ -586,9 +592,6 @@ def test_Coding_no_utr_degenerate_return(): """UTRs may be missing.""" crossmap = Coding([(10, 11)], (10, 11)) - print(crossmap.coordinate_to_coding(11), crossmap.coordinate_to_coding(11, True)) - print(crossmap.coordinate_to_coding(12), crossmap.coordinate_to_coding(12, True)) - assert crossmap.coordinate_to_coding(8, True) == { "position": 2, "offset": 0, From e281d6423fc2b62f71b7a3ff55d961bb96417fb0 Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Wed, 11 Mar 2026 09:51:59 +0100 Subject: [PATCH 053/127] Cleanup --- mutalyzer_crossmapper/crossmapper.py | 31 +++++++++++++++------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/mutalyzer_crossmapper/crossmapper.py b/mutalyzer_crossmapper/crossmapper.py index 65b07d9..ff2369a 100644 --- a/mutalyzer_crossmapper/crossmapper.py +++ b/mutalyzer_crossmapper/crossmapper.py @@ -146,49 +146,52 @@ def coding_to_coordinate(self, pos_m): :returns int: Coordinate. """ region = pos_m["region"] + position = pos_m["position"] + offset = pos_m["offset"] + if region == "u": noncoding_pos_m = { - "position": pos_m["position"] - pos_m["offset"], + "position": position - offset, "offset": 0, "region": "u" } elif region == "d": noncoding_pos_m = { - "position": pos_m["position"] + pos_m["offset"], + "position": position + offset, "offset": 0, "region": "d" } elif region == "": noncoding_pos_m = { - "position": pos_m["position"] + self._coding[0] -1, - "offset": pos_m["offset"], + "position": position + self._coding[0] -1, + "offset": offset, "region": "" } # add checks for degenerate results? elif region == "-": - if pos_m["position"] > self._coding[0]: #degenerate result + if position > self._coding[0]: noncoding_pos_m = { - "position": pos_m["position"] - self._coding[0], - "offset": pos_m["offset"], + "position": position - self._coding[0], + "offset": offset, "region": "u" } else: noncoding_pos_m = { - "position": self._coding[0] - pos_m["position"], - "offset": pos_m["offset"], + "position": self._coding[0] - position, + "offset": offset, "region": "" } else: # * - if pos_m["position"] > self._coding[1]: + if position > self._coding[1]: noncoding_pos_m = { - "position": pos_m["position"] - self._coding[1], - "offset": pos_m["offset"], + "position": position - self._coding[1], + "offset": offset, "region": "d" } else: noncoding_pos_m = { - "position": self._coding[1] + pos_m["position"] - 1, - "offset": pos_m["offset"], + "position": self._coding[1] + position - 1, + "offset": offset, "region": "" } return self._noncoding.to_coordinate(noncoding_pos_m) From 925cb568c0af9e44cd59a9bccb9eed145432498e Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Wed, 11 Mar 2026 09:54:13 +0100 Subject: [PATCH 054/127] Cleanup --- mutalyzer_crossmapper/crossmapper.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/mutalyzer_crossmapper/crossmapper.py b/mutalyzer_crossmapper/crossmapper.py index ff2369a..c16fdf7 100644 --- a/mutalyzer_crossmapper/crossmapper.py +++ b/mutalyzer_crossmapper/crossmapper.py @@ -74,12 +74,12 @@ def __init__(self, locations, cds, inverted=False): if self._inverted: self._coding = (b1["position"] + b1["offset"], b0["position"] + b0["offset"] + 1) - self._cds_len = abs((b0["position"] + b0["offset"]) - (b1["position"] + b1["offset"])) + self._cds_len = (b0["position"] + b0["offset"]) - (b1["position"] + b1["offset"]) self._exons_end = e1["position"] self._exons_start = e0["position"] else: self._coding = (b0["position"] + b0["offset"], b1["position"] + b1["offset"] +1) - self._cds_len = abs((b1["position"] + b1["offset"]) - (b0["position"] + b0["offset"])) + self._cds_len = (b1["position"] + b1["offset"]) - (b0["position"] + b0["offset"]) self._exons_end = e1["position"] self._exons_start = e0["position"] @@ -202,7 +202,7 @@ def coordinate_to_protein(self, coordinate): :arg int coordinate: Coordinate. - :returns tuple: Protein position (p.). + :returns dict: Protein position model(p.). """ pos = self.coordinate_to_coding(coordinate) @@ -216,20 +216,20 @@ def coordinate_to_protein(self, coordinate): "position_in_codon": (pos["position"]+2) % 3 + 1, **{k: v for k, v in pos.items() if k != "position"}} - def protein_to_coordinate(self, position): + def protein_to_coordinate(self, pos_m): """Convert a protein position (p.) to a coordinate. - :arg tuple position: Protein position (p.). + :arg dict position: Protein position model(p.). :returns int: Coordinate. """ - if position["region"] in ["-", "*"]: + if pos_m["region"] in ["-", "*"]: return self.coding_to_coordinate( - {"position": 3 * position["position"] + position["position_in_codon"] - 3, - "offset": position["offset"], - "region": position["region"]}) + {"position": 3 * pos_m["position"] + pos_m["position_in_codon"] - 3, + "offset": pos_m["offset"], + "region": pos_m["region"]}) return self.coding_to_coordinate( - {"position": 3 * position["position"] + position["position_in_codon"] - 3, - "offset": position["offset"], - "region": position["region"]}) + {"position": 3 * pos_m["position"] + pos_m["position_in_codon"] - 3, + "offset": pos_m["offset"], + "region": pos_m["region"]}) From 5f2bef7a142ebe0d1bad23dcd437d861719dd09f Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Wed, 11 Mar 2026 11:55:38 +0100 Subject: [PATCH 055/127] Local copy before checkout --- tests/helper.py | 19 +++++++++++++++---- tests/test_crossmapper.py | 6 ++++++ 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/tests/helper.py b/tests/helper.py index a17b119..9e51367 100644 --- a/tests/helper.py +++ b/tests/helper.py @@ -2,8 +2,19 @@ def invariant(f, x, f_i, y): assert f(x) == y assert f_i(y) == x - def degenerate_equal(f, coordinate, locations): - assert f(locations[0]) == coordinate - assert len( - set(map(f, locations))) == 1 + results = [f(loc) for loc in locations] + + # First condition: first maps correctly + assert results[0] == coordinate, ( + f"\nFirst location: {locations[0]}" + f"\nExpected: {coordinate}" + f"\nGot: {results[0]}" + ) + + # Second condition: all map to same coordinate + assert len(set(results)) == 1, ( + f"\nLocations: {locations}" + f"\nResults: {results}" + f"\nExpected all to map to the same coordinate" + ) \ No newline at end of file diff --git a/tests/test_crossmapper.py b/tests/test_crossmapper.py index 788863b..d22715b 100644 --- a/tests/test_crossmapper.py +++ b/tests/test_crossmapper.py @@ -29,6 +29,12 @@ def test_NonCoding(): crossmap = NonCoding(_exons) # Boundary between upstream and transcript. + invariant( + crossmap.coordinate_to_noncoding, + 3, + crossmap.noncoding_to_coordinate, + {"position": 2, "offset": 0, "region": "u"}, + ) invariant( crossmap.coordinate_to_noncoding, 4, From 55fe7979a8f353a2cdee9d7792adda36619a099c Mon Sep 17 00:00:00 2001 From: "X.Liu" <34545147+XLIU-hub@users.noreply.github.com> Date: Wed, 11 Mar 2026 15:41:39 +0100 Subject: [PATCH 056/127] Update table for Genomic and NonCoding classes in document --- README.rst | 129 ++++++++++------------------------------------------- 1 file changed, 24 insertions(+), 105 deletions(-) diff --git a/README.rst b/README.rst index afeb981..6559526 100644 --- a/README.rst +++ b/README.rst @@ -60,17 +60,17 @@ The ``Genomic`` class provides an interface for conversions between genomic posi Genomic Position Model ~~~~~~~~~~~~~~~~~~~~~~~ -Genomic positions follow the HGVS ``g`` coordinate system. They are represented as dictionaries: +Genomic positions follow the HGVS ``g`` coordinate system. They are represented as dictionaries. Below is an example of `g.1` in HGVS. .. code-block:: json { - "position": int + "position": 1 } Where: -- **position**: a positive integer +- **position**: a positive integer(>0) Genomic Position Conversion ~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -83,17 +83,6 @@ Genomic Position Conversion {"position": 1} >>> crossmap.genomic_to_coordinate({"position": 1}) 0 -Here is the mapping of coordinates to genomic positions: - -.. csv-table:: Coordinate to Genomic Position (0-4) - :header: "Coordinate", "Position" - - 0, 1 - 1, 2 - 2, 3 - 3, 4 - 4, 5 - ... NonCoding Class --------------- @@ -103,21 +92,21 @@ The ``NonCoding`` class provides conversions between noncoding positions and coo NonCoding Position Model ~~~~~~~~~~~~~~~~~~~~~~~ -Noncoding positions follow the HGVS ``n`` coordinate system. They are represented as dictionaries: +Noncoding positions follow the HGVS ``n`` coordinate system. They are represented as dictionaries. Below is an example of ``n.14+1`` in HGVS. .. code-block:: json { - "position": int, - "offset": int, - "region": str + "position": 10, + "offset": -5, + "region": '' } Where: -- **position**: a positive integer +- **position**: a positive integer (>0) - **offset**: an integer indicating the offset relative to the position (negative for upstream, positive for downstream) -- **region**: a string describing the region type (``""`` for standard, ``"u"`` for upstream, ``"d"`` for downstream) +- **region**: a string describing the region type (``''`` for standard, ``'u'`` for upstream, ``'d'`` for downstream) NonCoding Position Conversion ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -126,10 +115,10 @@ NonCoding Position Conversion >>> from mutalyzer_crossmapper import NonCoding >>> crossmap = NonCoding(_exons) - >>> crossmap.coordinate_to_noncoding(35) - {"position": 14, "offset": 1, "region": ""} - >>> crossmap.noncoding_to_coordinate({"position": 14, "offset": 1, "region": ""}) - 35 + >>> crossmap.coordinate_to_noncoding(25) + {"position": 10, "offset": -5, "region": ""} + >>> crossmap.noncoding_to_coordinate({"position": 10, "offset": -5, "region": ''}) + 25 Notes ~~~~~ @@ -140,88 +129,18 @@ Here is the mapping of coordinates to noncoding positions: .. csv-table:: :class: table-scroll - :header: "Coordinate", "Position", "Offset", "Region" + :header: "Coordinate", "Position", "Offset", "Region", "HGVS" - "0", "5","0", "u" - "1", "4","0", "u" - "2", "3","0", "u" - "3", "2","0", "u" - "4", "1","0", "u" - "5", "1","0", "" - "6", "2","0", "" - "7", "3","0", "" - "8", "3","1", "" - "9", "3","2", "" - "10", "3","3", "" - "11", "4","-3", "" - "12", "4","-2", "" - "13", "4","-1", "" - "14", "4","0", "" - "15", "5","0", "" - "16", "6","0", "" - "17", "7","0", "" - "18", "8","0", "" - "19", "9","0", "" - "20", "9","1", "" - "21", "9","2", "" - "22", "9","3", "" - "23", "9","4", "" - "24", "9","5", "" - "25", "10","-5", "" - "26", "10","-4", "" - "27", "10","-3", "" - "28", "10","-2", "" - "29", "10","-1", "" - "30", "10","0", "" - "31", "11","0", "" - "32", "12","0", "" - "33", "13","0", "" - "34", "14","0", "" - "35", "14","1", "" - "36", "14","2", "" - "37", "14","3", "" - "38", "15","-2", "" - "39", "15","-1", "" - "40", "15","0", "" - "41", "16","0", "" - "42", "17","0", "" - "43", "18","0", "" - "44", "18","1", "" - "45", "18","2", "" - "46", "18","3", "" - "47", "19","-3", "" - "48", "19","-2", "" - "49", "19","-1", "" - "50", "19","0", "" - "51", "20","0", "" - "52", "20","1", "" - "53", "20","2", "" - "54", "20","3", "" - "55", "20","4", "" - "56", "20","5", "" - "57", "20","6", "" - "58", "20","7", "" - "59", "20","8", "" - "60", "20","9", "" - "61", "21","-9", "" - "62", "21","-8", "" - "63", "21","-7", "" - "64", "21","-6", "" - "65", "21","-5", "" - "66", "21","-4", "" - "67", "21","-3", "" - "68", "21","-2", "" - "69", "21","-1", "" - "70", "21","0", "" - "71", "22","0", "" - "72", "1","0", "d" - "73", "2","0", "d" - "74", "3","0", "d" - "75", "4","0", "d" - "76", "5","0", "d" - "77", "6","0", "d" - "78", "7","0", "d" - "79", "8","0", "d" + "4", "1","0", "u", "n.u1" + "5", "1","0", "", "n.1" + ... + "24", "9","5", "", "n.9+5" + "25", "10","-5", "", "n.10-5" + ... + "71", "22","0", "", "n.22" + "72", "1","0", "d", "n.d1" + ... + "79", "8","0", "d", "n.d8" From a2003b362a95c9834d3a8c004483bf0344c0d403 Mon Sep 17 00:00:00 2001 From: "X.Liu" <34545147+XLIU-hub@users.noreply.github.com> Date: Wed, 11 Mar 2026 15:52:12 +0100 Subject: [PATCH 057/127] Update table for Coding class in document --- README.rst | 89 ++++++------------------------------------------------ 1 file changed, 10 insertions(+), 79 deletions(-) diff --git a/README.rst b/README.rst index 6559526..ece04cf 100644 --- a/README.rst +++ b/README.rst @@ -133,13 +133,10 @@ Here is the mapping of coordinates to noncoding positions: "4", "1","0", "u", "n.u1" "5", "1","0", "", "n.1" - ... "24", "9","5", "", "n.9+5" "25", "10","-5", "", "n.10-5" - ... "71", "22","0", "", "n.22" "72", "1","0", "d", "n.d1" - ... "79", "8","0", "d", "n.d8" @@ -152,14 +149,14 @@ The ``Coding`` class provides conversions between coding positions and coordinat Coding Position Model ~~~~~~~~~~~~~~~~~~~~ -Coding positions follow the HGVS ``c`` coordinate system. They are represented as dictionaries: +Coding positions follow the HGVS ``c`` coordinate system. They are represented as dictionaries. Here is an example of ``c.*1+3``. .. code-block:: json { - "position": int, - "offset": int, - "region": str + "position": 1, + "offset": 3, + "region": '*' } Where: @@ -175,10 +172,10 @@ Coding Position Conversion >>> from mutalyzer_crossmapper import Coding >>> crossmap = Coding(_exons, _cds) - >>> crossmap.coordinate_to_coding(31) - {"position": -1, "offset": 0, "region": '-'} - >>> crossmap.coding_to_coordinate({"position": -1, "offset": 0, "region": '-'}) - 31 + >>> crossmap.coordinate_to_coding(46) + {"position": 1, "offset": 3, "region": '*'} + >>> crossmap.coding_to_coordinate({"position": 1, "offset": 3, "region": '*'}) + 46 Notes ~~~~~ @@ -189,88 +186,22 @@ Here is the mapping of coordinates to coding positions: .. csv-table:: :class: table-scroll - :header: "Coordinate", "Position", "Offset", "Region" + :header: "Coordinate", "Position", "Offset", "Region", "HGVS" - "0", "5","0", "u" - "1", "4","0", "u" - "2", "3","0", "u" - "3", "2","0", "u" "4", "1","0", "u" "5", "11","0", '-' - "6", "10","0", '-' - "7", "9","0", '-' - "8", "9","1", '-' - "9", "9","2", '-' - "10", "9","3", '-' - "11", "8","-3", '-' - "12", "8","-2", '-' - "13", "8","-1", '-' - "14", "8","0", '-' - "15", "7","0", '-' - "16", "6","0", '-' - "17", "5","0", '-' - "18", "4","0", '-' - "19", "3","0", '-' - "20", "3","1", '-' - "21", "3","2", '-' - "22", "3","3", '-' - "23", "3","4", '-' "24", "3","5", '-' "25", "2","-5", '-' - "26", "2","-4", '-' - "27", "2","-3", '-' - "28", "2","-2", '-' - "29", "2","-1", '-' - "30", "2","0", '-' "31", "1","0", '-' "32", "1","0", "" - "33", "2","0", "" - "34", "3","0", "" - "35", "3","1", "" - "36", "3","2", "" "37", "3","3", "" "38", "4","-2", "" - "39", "4","-1", "" - "40", "4","0", "" - "41", "5","0", "" - "42", "6","0", "" "43", "1","0", '*' - "44", "1","1", '*' - "45", "1","2", '*' - "46", "1","3", '*' - "47", "2","-3", '*' - "48", "2","-2", '*' - "49", "2","-1", '*' - "50", "2","0", '*' - "51", "3","0", '*' - "52", "3","1", '*' - "53", "3","2", '*' - "54", "3","3", '*' - "55", "3","4", '*' - "56", "3","5", '*' - "57", "3","6", '*' - "58", "3","7", '*' - "59", "3","8", '*' "60", "3","9", '*' "61", "4","-9", '*' - "62", "4","-8", '*' - "63", "4","-7", '*' - "64", "4","-6", '*' - "65", "4","-5", '*' - "66", "4","-4", '*' - "67", "4","-3", '*' - "68", "4","-2", '*' - "69", "4","-1", '*' - "70", "4","0", '*' "71", "5","0", '*' "72", "1","0", "d" - "73", "2","0", "d" - "74", "3","0", "d" - "75", "4","0", "d" - "76", "5","0", "d" - "77", "6","0", "d" - "78", "7","0", "d" - "79", "8","0", "d" + From 4d9089a500e500e8a52c6194490ac72350c61051 Mon Sep 17 00:00:00 2001 From: "X.Liu" <34545147+XLIU-hub@users.noreply.github.com> Date: Wed, 11 Mar 2026 15:58:54 +0100 Subject: [PATCH 058/127] Add HGVS column in coding table --- README.rst | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/README.rst b/README.rst index ece04cf..f316b9f 100644 --- a/README.rst +++ b/README.rst @@ -188,19 +188,19 @@ Here is the mapping of coordinates to coding positions: :class: table-scroll :header: "Coordinate", "Position", "Offset", "Region", "HGVS" - "4", "1","0", "u" - "5", "11","0", '-' - "24", "3","5", '-' - "25", "2","-5", '-' - "31", "1","0", '-' - "32", "1","0", "" - "37", "3","3", "" - "38", "4","-2", "" - "43", "1","0", '*' - "60", "3","9", '*' - "61", "4","-9", '*' - "71", "5","0", '*' - "72", "1","0", "d" + "4", "1","0", "u", "c.u1" + "5", "11","0", '-', "c.-11" + "24", "3","5", '-', "c.-3+5" + "25", "2","-5", '-', "c.-2-5" + "31", "1","0", '-', "c.-1" + "32", "1","0", "", "c.1" + "37", "3","3", "", "c.3+3" + "38", "4","-2", "", "c.4-2" + "43", "1","0", '*', "c.*1" + "60", "3","9", '*', "c.*3+9" + "61", "4","-9", '*', "c.*4+9" + "71", "5","0", '*', "c.*5" + "79", "8","0", "d", "c.d8" From 15428949bea49d2dcab35cefa27b0a20aa680a72 Mon Sep 17 00:00:00 2001 From: "X.Liu" <34545147+XLIU-hub@users.noreply.github.com> Date: Wed, 11 Mar 2026 16:06:51 +0100 Subject: [PATCH 059/127] Fix sytax --- README.rst | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/README.rst b/README.rst index f316b9f..86ccdb0 100644 --- a/README.rst +++ b/README.rst @@ -116,7 +116,7 @@ NonCoding Position Conversion >>> from mutalyzer_crossmapper import NonCoding >>> crossmap = NonCoding(_exons) >>> crossmap.coordinate_to_noncoding(25) - {"position": 10, "offset": -5, "region": ""} + {"position": 10, "offset": -5, "region": ''} >>> crossmap.noncoding_to_coordinate({"position": 10, "offset": -5, "region": ''}) 25 @@ -131,13 +131,13 @@ Here is the mapping of coordinates to noncoding positions: :class: table-scroll :header: "Coordinate", "Position", "Offset", "Region", "HGVS" - "4", "1","0", "u", "n.u1" - "5", "1","0", "", "n.1" - "24", "9","5", "", "n.9+5" - "25", "10","-5", "", "n.10-5" - "71", "22","0", "", "n.22" - "72", "1","0", "d", "n.d1" - "79", "8","0", "d", "n.d8" + "4", "1","0", "'u'", "n.u1" + "5", "1","0", "''", "n.1" + "24", "9","5", "''", "n.9+5" + "25", "10","-5", "''", "n.10-5" + "71", "22","0", "''", "n.22" + "72", "1","0", "'d'", "n.d1" + "79", "8","0", "'d'", "n.d8" @@ -188,19 +188,19 @@ Here is the mapping of coordinates to coding positions: :class: table-scroll :header: "Coordinate", "Position", "Offset", "Region", "HGVS" - "4", "1","0", "u", "c.u1" + "4", "1","0", "'u'", "c.u1" "5", "11","0", '-', "c.-11" "24", "3","5", '-', "c.-3+5" "25", "2","-5", '-', "c.-2-5" "31", "1","0", '-', "c.-1" - "32", "1","0", "", "c.1" - "37", "3","3", "", "c.3+3" - "38", "4","-2", "", "c.4-2" + "32", "1","0", "''", "c.1" + "37", "3","3", "''", "c.3+3" + "38", "4","-2", "''", "c.4-2" "43", "1","0", '*', "c.*1" "60", "3","9", '*', "c.*3+9" "61", "4","-9", '*', "c.*4+9" "71", "5","0", '*', "c.*5" - "79", "8","0", "d", "c.d8" + "79", "8","0", "'d'", "c.d8" From 75a5cba52247e006eb42e013d1c66427bc8059de Mon Sep 17 00:00:00 2001 From: "X.Liu" <34545147+XLIU-hub@users.noreply.github.com> Date: Wed, 11 Mar 2026 16:14:22 +0100 Subject: [PATCH 060/127] Update protein example in document --- README.rst | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/README.rst b/README.rst index 86ccdb0..b1749f1 100644 --- a/README.rst +++ b/README.rst @@ -64,9 +64,7 @@ Genomic positions follow the HGVS ``g`` coordinate system. They are represented .. code-block:: json - { - "position": 1 - } + {"position": 1} Where: @@ -211,15 +209,15 @@ Protein Protein Position Model ~~~~~~~~~~~~~~~~~~~~~~ -Protein positions follow the HGVS ``p`` coordinate system. They are represented as dictionaries: +Protein positions follow the HGVS ``p`` coordinate system. They are represented as dictionaries. Here is an example of ``p.1`` in HGVS. .. code-block:: json { - "position": int, - "position_in_codon": int, - "offset": int, - "region": str + "position": 1, + "position_in_codon": 3, + "offset": 3, + "region": '' } Where: @@ -227,7 +225,7 @@ Where: - **position**: the amino acid position (1-based) - **position_in_codon**: the codon nucleotide index (1, 2, or 3) - **offset**: an integer indicating offset relative to the codon -- **region**: a string describing the region type (``""`` for standard positions) +- **region**: a string describing the region type (``''`` for standard positions) Protein Position Conversion ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -236,10 +234,10 @@ Conversions between protein positions and coordinates: .. code-block:: python - >>> crossmap.coordinate_to_protein(41) - {"position": 2, "position_in_codon": 2, "offset": 1, "region": ""} - >>> crossmap.protein_to_coordinate({"position": 2, "position_in_codon": 2, "offset": 1, "region": ""}) - 41 + >>> crossmap.coordinate_to_protein(37) + {"position": 1, "position_in_codon": 3, "offset": 3, "region": ""} + >>> crossmap.protein_to_coordinate({"position": 1, "position_in_codon": 3, "offset": 3, "region": ""}) + 37 .. _numbering: http://varnomen.hgvs.org/bg-material/numbering/ From b0ed66feeceab9cf802f59d7960a5c4253f18690 Mon Sep 17 00:00:00 2001 From: "X.Liu" <34545147+XLIU-hub@users.noreply.github.com> Date: Thu, 12 Mar 2026 17:14:42 +0100 Subject: [PATCH 061/127] Add protein table in document --- README.rst | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index b1749f1..a31322f 100644 --- a/README.rst +++ b/README.rst @@ -97,7 +97,7 @@ Noncoding positions follow the HGVS ``n`` coordinate system. They are represente { "position": 10, "offset": -5, - "region": '' + "region": "" } Where: @@ -217,7 +217,7 @@ Protein positions follow the HGVS ``p`` coordinate system. They are represented "position": 1, "position_in_codon": 3, "offset": 3, - "region": '' + "region": "" } Where: @@ -240,5 +240,26 @@ Conversions between protein positions and coordinates: 37 +Here is the mapping of coordinates to coding positions: + +.. csv-table:: + :class: table-scroll + :header: "Coordinate", "Position", "position_in_codon", "Offset", "Region", "HGVS" + + "0", "4", "2", "0", "'u'", + "4", "4", "2", "0", "'u'", + "5", "4", "2", "0", '-', + "6", "4", "3", "0", '-', + "7", "3", "1", "0", '-', + "31", "1", "3", "0", '-', + "32", "1", "1", "0", "''", "p.1" + "42", "2", "3", "0", "''", "p.2" + "43", "1", "1", "0", '*', + "44", "1", "1", "1", '*', + "79", "2", "2", "0", "'d'", + + + + .. _numbering: http://varnomen.hgvs.org/bg-material/numbering/ .. _ReadTheDocs: https://mutalyzer-crossmapper.readthedocs.io From 16e9a62261606689df9f3336d6f22bd3ed59d680 Mon Sep 17 00:00:00 2001 From: "X.Liu" <34545147+XLIU-hub@users.noreply.github.com> Date: Thu, 12 Mar 2026 17:15:33 +0100 Subject: [PATCH 062/127] Fix typo --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index a31322f..29f15d1 100644 --- a/README.rst +++ b/README.rst @@ -240,7 +240,7 @@ Conversions between protein positions and coordinates: 37 -Here is the mapping of coordinates to coding positions: +Here is the mapping of coordinates to protein positions: .. csv-table:: :class: table-scroll From d6c868d168ed855ed98eb755c69285a7b95ee2e1 Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Fri, 13 Mar 2026 09:17:35 +0100 Subject: [PATCH 063/127] Add tests --- tests/test_crossmapper.py | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/tests/test_crossmapper.py b/tests/test_crossmapper.py index d22715b..40546b8 100644 --- a/tests/test_crossmapper.py +++ b/tests/test_crossmapper.py @@ -107,6 +107,7 @@ def test_NonCoding_degenerate(): [ {"position": 1, "offset": 0, "region": "u"}, {"position": 0, "offset": -1, "region": "u"}, + {"position": 1, "offset": -1, "region": ""}, ], ) @@ -117,6 +118,7 @@ def test_NonCoding_degenerate(): [ {"position": 1, "offset": 0, "region": "d"}, {"position": 0, "offset": 1, "region": "d"}, + {"position": 22, "offset": 1, "region": ""}, ], ) @@ -129,14 +131,18 @@ def test_NonCoding_inverted_degenerate(): degenerate_equal( crossmap.noncoding_to_coordinate, 72, - [{"position": 1, "offset": 0, "region": "u"}], + [ + {"position": 1, "offset": 0, "region": "u"}, + {"position": 1, "offset": -1, "region": ""},], ) # Boundary between downstream and transcript. degenerate_equal( crossmap.noncoding_to_coordinate, 4, - [{"position": 1, "offset": 0, "region": "d"}], + [ + {"position": 1, "offset": 0, "region": "d"}, + {"position": 22, "offset": 1, "region": ""},], ) @@ -459,6 +465,8 @@ def test_Coding_degenerate(): {"position": 1, "offset": 0, "region": "u"}, {"position": 2, "offset": 1, "region": "u"}, {"position": 0, "offset": -1, "region": "u"}, + {"position": 1, "offset": -1, "region": "-"}, + {"position": 1, "offset": -2, "region": ""}, ], ) degenerate_equal( @@ -468,6 +476,9 @@ def test_Coding_degenerate(): {"position": 1, "offset": 0, "region": "d"}, {"position": 8, "offset": -7, "region": "d"}, {"position": 0, "offset": -1, "region": "d"}, + {"position": 8, "offset": 2, "region": ""}, + {"position": 2, "offset": 0, "region": "*"}, + {"position": 1, "offset": 1, "region": "*"}, ], ) @@ -483,7 +494,10 @@ def test_Coding_inverted_degenerate(): {"position": 1, "offset": 0, "region": "u"}, {"position": 2, "offset": 1, "region": "u"}, {"position": 0, "offset": -1, "region": "u"}, + {"position": 1, "offset": -2, "region": ""}, + {"position": 2, "offset": -3, "region": ""}, {"position": 2, "offset": 0, "region": "-"}, + {"position": 1, "offset": -1, "region": "-"}, ], ) degenerate_equal( @@ -492,6 +506,8 @@ def test_Coding_inverted_degenerate(): [ {"position": 1, "offset": 0, "region": "d"}, {"position": 2, "offset": -1, "region": "d"}, + {"position": 8, "offset": 2, "region": ""}, + {"position": 7, "offset": 3, "region": ""}, {"position": 2, "offset": 0, "region": "*"}, ], ) @@ -555,6 +571,9 @@ def test_Coding_no_utr_degenerate(): {"position": 2, "offset": 1, "region": "u"}, {"position": 1, "offset": 0, "region": "u"}, {"position": 1, "offset": 0, "region": "-"}, + {"position": 2, "offset": 1, "region": "-"}, + {"position": 1, "offset": -1, "region": ""}, + {"position": 2, "offset": -2, "region": ""}, ], ) degenerate_equal( @@ -564,6 +583,9 @@ def test_Coding_no_utr_degenerate(): {"position": 1, "offset": 0, "region": "d"}, {"position": 2, "offset": -1, "region": "d"}, {"position": 1, "offset": 0, "region": "*"}, + {"position": 2, "offset": -1, "region": "*"}, + {"position": 3, "offset": -2, "region": "*"}, + {"position": 1, "offset": 1, "region": ""}, ], ) @@ -579,7 +601,8 @@ def test_Coding_inverted_no_utr_degenerate(): {"position": 1, "offset": 0, "region": "u"}, {"position": 2, "offset": 1, "region": "u"}, {"position": 1, "offset": 0, "region": "-"}, - + {"position": 2, "offset": 1, "region": "-"}, + {"position": 1, "offset": -1, "region": ""}, ], ) degenerate_equal( @@ -645,7 +668,7 @@ def test_Coding_protein(): crossmap.coordinate_to_protein, 31, crossmap.protein_to_coordinate, - {"position": 1, "position_in_codon": 1, "offset": 0, "region": "-"}, + {"position": 1, "position_in_codon": 3, "offset": 0, "region": "-"}, ) invariant( crossmap.coordinate_to_protein, From 41bd3d5939a758ca057c7f941a1d7ad72236f23d Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Fri, 13 Mar 2026 09:26:18 +0100 Subject: [PATCH 064/127] Refactor crossmap --- mutalyzer_crossmapper/crossmapper.py | 50 ++++++++++++++++++++-------- 1 file changed, 37 insertions(+), 13 deletions(-) diff --git a/mutalyzer_crossmapper/crossmapper.py b/mutalyzer_crossmapper/crossmapper.py index c16fdf7..a714902 100644 --- a/mutalyzer_crossmapper/crossmapper.py +++ b/mutalyzer_crossmapper/crossmapper.py @@ -75,12 +75,12 @@ def __init__(self, locations, cds, inverted=False): if self._inverted: self._coding = (b1["position"] + b1["offset"], b0["position"] + b0["offset"] + 1) self._cds_len = (b0["position"] + b0["offset"]) - (b1["position"] + b1["offset"]) - self._exons_end = e1["position"] + self._exons = (e1["position"], e0["position"]) self._exons_start = e0["position"] else: self._coding = (b0["position"] + b0["offset"], b1["position"] + b1["offset"] +1) self._cds_len = (b1["position"] + b1["offset"]) - (b0["position"] + b0["offset"]) - self._exons_end = e1["position"] + self._exons = (e0["position"], e1["position"]) self._exons_start = e0["position"] def _coordinate_to_coding(self, coordinate): @@ -126,7 +126,7 @@ def coordinate_to_coding(self, coordinate, degenerate=False): if degenerate: if pos_m["region"] == "u": if self._inverted: - pos_m["position"] = pos_m["position"] + self._exons_start - self._coding[1] + 1 + pos_m["position"] = pos_m["position"] + self._exons[1] - self._coding[1] + 1 else: pos_m["position"] = pos_m["position"] + self._coding[0] pos_m["region"] = "-" @@ -134,7 +134,7 @@ def coordinate_to_coding(self, coordinate, degenerate=False): if self._inverted: pos_m["position"] = pos_m["position"] + self._coding[0] else: - pos_m["position"] = pos_m["position"] + self._exons_end - self._coding[1] + 1 + pos_m["position"] = pos_m["position"] + self._exons[1]- self._coding[1] + 1 pos_m["region"] = "*" return pos_m @@ -171,8 +171,8 @@ def coding_to_coordinate(self, pos_m): elif region == "-": if position > self._coding[0]: noncoding_pos_m = { - "position": position - self._coding[0], - "offset": offset, + "position": position - self._coding[0] - offset, + "offset": 0, "region": "u" } else: @@ -182,10 +182,10 @@ def coding_to_coordinate(self, pos_m): "region": "" } else: # * - if position > self._coding[1]: + if position > self._coding[0]: noncoding_pos_m = { - "position": position - self._coding[1], - "offset": offset, + "position": position - self._coding[0] + offset, + "offset": 0, "region": "d" } else: @@ -206,10 +206,30 @@ def coordinate_to_protein(self, coordinate): """ pos = self.coordinate_to_coding(coordinate) - if pos["region"] in ["-", "*"]: + if pos["region"] == "u": + pos = self.coordinate_to_coding(coordinate + pos["position"]) return { "position": pos["position"] // 3 + 1, "position_in_codon": pos["position"] % 3, + "region": "u", + **{k: v for k, v in pos.items() if k not in ["position", "region"]}} + elif pos["region"] == "d": + pos = self.coordinate_to_coding(coordinate - pos["position"]) + return { + "position": pos["position"] // 3 + 1, + "position_in_codon": pos["position"] % 3, + "region": "d", + **{k: v for k, v in pos.items() if k not in ["position", "region"]}} + + if pos["region"] == "-": + return { + "position": (pos["position"]+2) // 3, + "position_in_codon": -pos["position"] % 3 + 1, + **{k: v for k, v in pos.items() if k != "position"}} + if pos["region"] == "*": + return { + "position": (pos["position"]+2)// 3, + "position_in_codon": (pos["position"] + 2) % 3 + 1, **{k: v for k, v in pos.items() if k != "position"}} return { "position": (pos["position"]+2) // 3, @@ -223,12 +243,16 @@ def protein_to_coordinate(self, pos_m): :returns int: Coordinate. """ - if pos_m["region"] in ["-", "*"]: + if pos_m["region"] == "-": return self.coding_to_coordinate( - {"position": 3 * pos_m["position"] + pos_m["position_in_codon"] - 3, + {"position": 3 * pos_m["position"] - pos_m["position_in_codon"] + 1, "offset": pos_m["offset"], "region": pos_m["region"]}) - + if pos_m["region"] == "*": + return self.coding_to_coordinate( + {"position": 3 * pos_m["position"] + pos_m["position_in_codon"] - 3, + "offset": pos_m["offset"], + "region": pos_m["region"]}) return self.coding_to_coordinate( {"position": 3 * pos_m["position"] + pos_m["position_in_codon"] - 3, "offset": pos_m["offset"], From 17523633f4aa100272199aa37f49aedc6af94a21 Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Fri, 13 Mar 2026 09:44:30 +0100 Subject: [PATCH 065/127] Cleanup --- mutalyzer_crossmapper/crossmapper.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/mutalyzer_crossmapper/crossmapper.py b/mutalyzer_crossmapper/crossmapper.py index a714902..dfdb23d 100644 --- a/mutalyzer_crossmapper/crossmapper.py +++ b/mutalyzer_crossmapper/crossmapper.py @@ -92,6 +92,7 @@ def _coordinate_to_coding(self, coordinate): """ noncoding_pos_m = self._noncoding.to_position(coordinate) location = noncoding_pos_m["position"] + if noncoding_pos_m["region"] == "": if location < self._coding[0]: return { @@ -123,18 +124,21 @@ def coordinate_to_coding(self, coordinate, degenerate=False): :returns dict: Coding position model (c./r.). """ pos_m = self._coordinate_to_coding(coordinate) + region = pos_m["region"] + position = pos_m["position"] + if degenerate: - if pos_m["region"] == "u": + if region == "u": if self._inverted: - pos_m["position"] = pos_m["position"] + self._exons[1] - self._coding[1] + 1 + pos_m["position"] = position + self._exons[1] - self._coding[1] + 1 else: - pos_m["position"] = pos_m["position"] + self._coding[0] + pos_m["position"] = position + self._coding[0] pos_m["region"] = "-" - if pos_m["region"] == "d": + if region == "d": if self._inverted: - pos_m["position"] = pos_m["position"] + self._coding[0] + pos_m["position"] = position + self._coding[0] else: - pos_m["position"] = pos_m["position"] + self._exons[1]- self._coding[1] + 1 + pos_m["position"] = position + self._exons[1]- self._coding[1] + 1 pos_m["region"] = "*" return pos_m @@ -167,9 +171,8 @@ def coding_to_coordinate(self, pos_m): "offset": offset, "region": "" } - # add checks for degenerate results? elif region == "-": - if position > self._coding[0]: + if position > self._coding[0]: # correct it to 'u' noncoding_pos_m = { "position": position - self._coding[0] - offset, "offset": 0, @@ -182,7 +185,7 @@ def coding_to_coordinate(self, pos_m): "region": "" } else: # * - if position > self._coding[0]: + if position > self._coding[0]: # correct it to 'd' noncoding_pos_m = { "position": position - self._coding[0] + offset, "offset": 0, @@ -213,7 +216,8 @@ def coordinate_to_protein(self, coordinate): "position_in_codon": pos["position"] % 3, "region": "u", **{k: v for k, v in pos.items() if k not in ["position", "region"]}} - elif pos["region"] == "d": + + if pos["region"] == "d": pos = self.coordinate_to_coding(coordinate - pos["position"]) return { "position": pos["position"] // 3 + 1, @@ -226,11 +230,13 @@ def coordinate_to_protein(self, coordinate): "position": (pos["position"]+2) // 3, "position_in_codon": -pos["position"] % 3 + 1, **{k: v for k, v in pos.items() if k != "position"}} + if pos["region"] == "*": return { "position": (pos["position"]+2)// 3, "position_in_codon": (pos["position"] + 2) % 3 + 1, **{k: v for k, v in pos.items() if k != "position"}} + return { "position": (pos["position"]+2) // 3, "position_in_codon": (pos["position"]+2) % 3 + 1, From 31f483adcf45b41823693aa4c5adad8fdcb8f668 Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Fri, 13 Mar 2026 09:46:06 +0100 Subject: [PATCH 066/127] Cleanup --- mutalyzer_crossmapper/crossmapper.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/mutalyzer_crossmapper/crossmapper.py b/mutalyzer_crossmapper/crossmapper.py index dfdb23d..cf751da 100644 --- a/mutalyzer_crossmapper/crossmapper.py +++ b/mutalyzer_crossmapper/crossmapper.py @@ -76,12 +76,10 @@ def __init__(self, locations, cds, inverted=False): self._coding = (b1["position"] + b1["offset"], b0["position"] + b0["offset"] + 1) self._cds_len = (b0["position"] + b0["offset"]) - (b1["position"] + b1["offset"]) self._exons = (e1["position"], e0["position"]) - self._exons_start = e0["position"] else: self._coding = (b0["position"] + b0["offset"], b1["position"] + b1["offset"] +1) self._cds_len = (b1["position"] + b1["offset"]) - (b0["position"] + b0["offset"]) self._exons = (e0["position"], e1["position"]) - self._exons_start = e0["position"] def _coordinate_to_coding(self, coordinate): """Convert a coordinate to a coding position (c./r.). From eeacf85fac8dab54e1cf9bcf4d55eafb46c46e9a Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Fri, 13 Mar 2026 09:47:34 +0100 Subject: [PATCH 067/127] Cleanup --- mutalyzer_crossmapper/crossmapper.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/mutalyzer_crossmapper/crossmapper.py b/mutalyzer_crossmapper/crossmapper.py index cf751da..d1a4cec 100644 --- a/mutalyzer_crossmapper/crossmapper.py +++ b/mutalyzer_crossmapper/crossmapper.py @@ -104,12 +104,11 @@ def _coordinate_to_coding(self, coordinate): "offset": noncoding_pos_m["offset"], "region": "*" } - else: - return { - "position": location - self._coding[0] + 1, - "offset": noncoding_pos_m["offset"], - "region": "" - } + return { + "position": location - self._coding[0] + 1, + "offset": noncoding_pos_m["offset"], + "region": "" + } else: return noncoding_pos_m From bb8ee668620aa8be644ad412a10031d52d9493e6 Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Fri, 13 Mar 2026 09:50:57 +0100 Subject: [PATCH 068/127] Cleanup --- mutalyzer_crossmapper/crossmapper.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/mutalyzer_crossmapper/crossmapper.py b/mutalyzer_crossmapper/crossmapper.py index d1a4cec..31de6c6 100644 --- a/mutalyzer_crossmapper/crossmapper.py +++ b/mutalyzer_crossmapper/crossmapper.py @@ -251,11 +251,6 @@ def protein_to_coordinate(self, pos_m): {"position": 3 * pos_m["position"] - pos_m["position_in_codon"] + 1, "offset": pos_m["offset"], "region": pos_m["region"]}) - if pos_m["region"] == "*": - return self.coding_to_coordinate( - {"position": 3 * pos_m["position"] + pos_m["position_in_codon"] - 3, - "offset": pos_m["offset"], - "region": pos_m["region"]}) return self.coding_to_coordinate( {"position": 3 * pos_m["position"] + pos_m["position_in_codon"] - 3, "offset": pos_m["offset"], From 87f507d0b8916c5764d448ad467d74dd89d82fda Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Fri, 13 Mar 2026 11:38:43 +0100 Subject: [PATCH 069/127] Add missing tests --- tests/test_crossmapper.py | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/tests/test_crossmapper.py b/tests/test_crossmapper.py index 40546b8..00caa14 100644 --- a/tests/test_crossmapper.py +++ b/tests/test_crossmapper.py @@ -106,7 +106,6 @@ def test_NonCoding_degenerate(): 4, [ {"position": 1, "offset": 0, "region": "u"}, - {"position": 0, "offset": -1, "region": "u"}, {"position": 1, "offset": -1, "region": ""}, ], ) @@ -117,8 +116,7 @@ def test_NonCoding_degenerate(): 72, [ {"position": 1, "offset": 0, "region": "d"}, - {"position": 0, "offset": 1, "region": "d"}, - {"position": 22, "offset": 1, "region": ""}, + {"position": 23, "offset": 0, "region": ""}, ], ) @@ -132,8 +130,8 @@ def test_NonCoding_inverted_degenerate(): crossmap.noncoding_to_coordinate, 72, [ - {"position": 1, "offset": 0, "region": "u"}, - {"position": 1, "offset": -1, "region": ""},], + {"position": 1, "offset": -1, "region": ""}, + {"position": 1, "offset": 0, "region": "u"},], ) # Boundary between downstream and transcript. @@ -463,22 +461,19 @@ def test_Coding_degenerate(): 9, [ {"position": 1, "offset": 0, "region": "u"}, - {"position": 2, "offset": 1, "region": "u"}, - {"position": 0, "offset": -1, "region": "u"}, - {"position": 1, "offset": -1, "region": "-"}, + {"position": 2, "offset": 0, "region": "-"}, {"position": 1, "offset": -2, "region": ""}, + {"position": 1, "offset": -10, "region": "*"}, ], ) degenerate_equal( crossmap.coding_to_coordinate, 20, [ - {"position": 1, "offset": 0, "region": "d"}, - {"position": 8, "offset": -7, "region": "d"}, - {"position": 0, "offset": -1, "region": "d"}, - {"position": 8, "offset": 2, "region": ""}, - {"position": 2, "offset": 0, "region": "*"}, {"position": 1, "offset": 1, "region": "*"}, + {"position": 2, "offset": 0, "region": "*"}, + {"position": 8, "offset": 2, "region": ""}, + {"position": 1, "offset": 10, "region": "-"}, ], ) From e5d734252aa7fef41bd3eebf6f60f2187653abeb Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Fri, 13 Mar 2026 13:18:51 +0100 Subject: [PATCH 070/127] Add missing tests --- tests/test_crossmapper.py | 30 +++++++++--------------------- 1 file changed, 9 insertions(+), 21 deletions(-) diff --git a/tests/test_crossmapper.py b/tests/test_crossmapper.py index 00caa14..55a9a92 100644 --- a/tests/test_crossmapper.py +++ b/tests/test_crossmapper.py @@ -487,12 +487,9 @@ def test_Coding_inverted_degenerate(): 20, [ {"position": 1, "offset": 0, "region": "u"}, - {"position": 2, "offset": 1, "region": "u"}, - {"position": 0, "offset": -1, "region": "u"}, - {"position": 1, "offset": -2, "region": ""}, - {"position": 2, "offset": -3, "region": ""}, {"position": 2, "offset": 0, "region": "-"}, - {"position": 1, "offset": -1, "region": "-"}, + {"position": 1, "offset": -2, "region": ""}, + {"position": 1, "offset": -10, "region": "*"}, ], ) degenerate_equal( @@ -500,10 +497,10 @@ def test_Coding_inverted_degenerate(): 9, [ {"position": 1, "offset": 0, "region": "d"}, - {"position": 2, "offset": -1, "region": "d"}, - {"position": 8, "offset": 2, "region": ""}, - {"position": 7, "offset": 3, "region": ""}, {"position": 2, "offset": 0, "region": "*"}, + {"position": 8, "offset": 2, "region": ""}, + {"position": 1, "offset": 10, "region": "-"}, + ], ) @@ -563,12 +560,9 @@ def test_Coding_no_utr_degenerate(): crossmap.coding_to_coordinate, 9, [ - {"position": 2, "offset": 1, "region": "u"}, {"position": 1, "offset": 0, "region": "u"}, {"position": 1, "offset": 0, "region": "-"}, - {"position": 2, "offset": 1, "region": "-"}, - {"position": 1, "offset": -1, "region": ""}, - {"position": 2, "offset": -2, "region": ""}, + {"position": 1, "offset": -2, "region": "*"}, ], ) degenerate_equal( @@ -576,11 +570,8 @@ def test_Coding_no_utr_degenerate(): 11, [ {"position": 1, "offset": 0, "region": "d"}, - {"position": 2, "offset": -1, "region": "d"}, {"position": 1, "offset": 0, "region": "*"}, - {"position": 2, "offset": -1, "region": "*"}, - {"position": 3, "offset": -2, "region": "*"}, - {"position": 1, "offset": 1, "region": ""}, + {"position": 1, "offset": 2, "region": "-"}, ], ) @@ -594,10 +585,8 @@ def test_Coding_inverted_no_utr_degenerate(): 11, [ {"position": 1, "offset": 0, "region": "u"}, - {"position": 2, "offset": 1, "region": "u"}, {"position": 1, "offset": 0, "region": "-"}, - {"position": 2, "offset": 1, "region": "-"}, - {"position": 1, "offset": -1, "region": ""}, + {"position": 1, "offset": -2, "region": "*"}, ], ) degenerate_equal( @@ -605,9 +594,8 @@ def test_Coding_inverted_no_utr_degenerate(): 9, [ {"position": 1, "offset": 0, "region": "d"}, - {"position": 2, "offset": -1, "region": "d"}, {"position": 1, "offset": 0, "region": "*"}, - {"position": 1, "offset": 1, "region": ""}, + {"position": 1, "offset": 2, "region": "-"}, ], ) From 53954d3bc26c4ec6feccd61d9504901ff45fc6ce Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Fri, 13 Mar 2026 13:21:48 +0100 Subject: [PATCH 071/127] Refactor --- mutalyzer_crossmapper/crossmapper.py | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/mutalyzer_crossmapper/crossmapper.py b/mutalyzer_crossmapper/crossmapper.py index 31de6c6..a334dd3 100644 --- a/mutalyzer_crossmapper/crossmapper.py +++ b/mutalyzer_crossmapper/crossmapper.py @@ -150,18 +150,8 @@ def coding_to_coordinate(self, pos_m): position = pos_m["position"] offset = pos_m["offset"] - if region == "u": - noncoding_pos_m = { - "position": position - offset, - "offset": 0, - "region": "u" - } - elif region == "d": - noncoding_pos_m = { - "position": position + offset, - "offset": 0, - "region": "d" - } + if region in ["u", "d"]: + return self._noncoding.to_coordinate(pos_m) elif region == "": noncoding_pos_m = { "position": position + self._coding[0] -1, From 6cb7d3238e83b7285403d71c820c966d055498d5 Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Fri, 20 Mar 2026 09:34:25 +0100 Subject: [PATCH 072/127] Cleanup --- mutalyzer_crossmapper/multi_locus.py | 32 +++++++--------------------- tests/test_multi_locus.py | 1 - 2 files changed, 8 insertions(+), 25 deletions(-) diff --git a/mutalyzer_crossmapper/multi_locus.py b/mutalyzer_crossmapper/multi_locus.py index 2853ee6..861e5cd 100644 --- a/mutalyzer_crossmapper/multi_locus.py +++ b/mutalyzer_crossmapper/multi_locus.py @@ -68,24 +68,10 @@ def to_position(self, coordinate:int): "region": region } - if location["offset"] == 0: - return { - "position": location["position"] + self._offsets[self._direction(index)], - "offset": 0, - "region": "" - } - - if location["offset"] < 0: - return { - "position": self._offsets[self._direction(index)], - "offset": location["offset"], - "region": "" - } - - return{ + return { "position": location["position"] + self._offsets[self._direction(index)], "offset": location["offset"], - "region": "" + "region": region } def to_coordinate(self, pos_m:dict): @@ -97,13 +83,11 @@ def to_coordinate(self, pos_m:dict): """ region = pos_m["region"] - if region == "u": - if self._inverted: - return abs(pos_m["position"]) + self._locations[-1][1] + pos_m["offset"] - 1 - return self._locations[0][0] - abs(pos_m["position"]) + pos_m["offset"] - - if region == "d": + if pos_m["region"] in ("u", "d"): + is_upstream = region == "u" if self._inverted: + is_upstream = not is_upstream + if is_upstream: return self._locations[0][0] - abs(pos_m["position"]) + pos_m["offset"] return abs(pos_m["position"]) + self._locations[-1][1] + pos_m["offset"] - 1 @@ -111,5 +95,5 @@ def to_coordinate(self, pos_m:dict): len(self._offsets), max(0, bisect_right(self._offsets, pos_m["position"]) - 1) ) - pos_m["position"] = pos_m["position"] - self._offsets[index] - return self._loci[self._direction(index)].to_coordinate(pos_m) + locus_pos_m = {**pos_m, "position": pos_m["position"] - self._offsets[index]} + return self._loci[self._direction(index)].to_coordinate(locus_pos_m) diff --git a/tests/test_multi_locus.py b/tests/test_multi_locus.py index c08ff7f..3434a62 100644 --- a/tests/test_multi_locus.py +++ b/tests/test_multi_locus.py @@ -321,6 +321,5 @@ def test_MultiLocus_inverted_degenerate(): [ {"position": 0, "offset": -1, "region": "d"}, {"position": 1, "offset": 0, "region": "d"}, - {"position": 2, "offset": 1, "region": "d"}, ], ) From 3e32bb96e258a0e6f02f5886bdebc5579f173300 Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Fri, 20 Mar 2026 09:35:05 +0100 Subject: [PATCH 073/127] Cleanup and add tests --- mutalyzer_crossmapper/crossmapper.py | 189 +++++++++++++-------------- tests/test_crossmapper.py | 29 +++- 2 files changed, 119 insertions(+), 99 deletions(-) diff --git a/mutalyzer_crossmapper/crossmapper.py b/mutalyzer_crossmapper/crossmapper.py index a334dd3..0fd0c09 100644 --- a/mutalyzer_crossmapper/crossmapper.py +++ b/mutalyzer_crossmapper/crossmapper.py @@ -52,9 +52,10 @@ def noncoding_to_coordinate(self, pos_m): :returns int: Coordinate. """ + multilocus_pos_m = {**pos_m} if pos_m["region"] == "": - pos_m["position"] = pos_m["position"] - 1 - return self._noncoding.to_coordinate(pos_m) + multilocus_pos_m["position"] = pos_m["position"] - 1 + return self._noncoding.to_coordinate(multilocus_pos_m) class Coding(NonCoding): @@ -74,13 +75,52 @@ def __init__(self, locations, cds, inverted=False): if self._inverted: self._coding = (b1["position"] + b1["offset"], b0["position"] + b0["offset"] + 1) - self._cds_len = (b0["position"] + b0["offset"]) - (b1["position"] + b1["offset"]) self._exons = (e1["position"], e0["position"]) else: self._coding = (b0["position"] + b0["offset"], b1["position"] + b1["offset"] +1) - self._cds_len = (b1["position"] + b1["offset"]) - (b0["position"] + b0["offset"]) self._exons = (e0["position"], e1["position"]) + def _degenerate_position(self, pos_m): + """Degenerate a coding position model (c./r.). + + :arg dict pos_m: Coding position model. + + :returns dict: a generate coding position model. + """ + region = pos_m["region"] + position = pos_m["position"] + + degenerated_pos_m = {"offset": pos_m["offset"]} + + if region == "u": + if self._inverted: + degenerated_pos_m["position"] = position + self._exons[1] - self._coding[1] + 1 + else: + degenerated_pos_m["position"] = position + self._coding[0] + degenerated_pos_m["region"] = "-" + if region == "d": + if self._inverted: + degenerated_pos_m["position"] = position + self._coding[0] + else: + degenerated_pos_m["position"] = position + self._exons[1]- self._coding[1] + 1 + degenerated_pos_m["region"] = "*" + return degenerated_pos_m + + def _normalize_position(self, pos_m): + """Normalize a coding position model (c./r.). + + :arg dict pos_m: Coding position model. + + :returns dict: a normalized coding postion model. + """ + initial_pos = {**pos_m, "offset": 0} + coordinate = self._coding_to_coordinate(initial_pos) + if self._inverted: + coordinate = coordinate - pos_m["offset"] + else: + coordinate = coordinate + pos_m["offset"] + return self.coordinate_to_coding(coordinate) + def _coordinate_to_coding(self, coordinate): """Convert a coordinate to a coding position (c./r.). @@ -89,28 +129,29 @@ def _coordinate_to_coding(self, coordinate): :returns dict: Coding position model (c./r.). """ noncoding_pos_m = self._noncoding.to_position(coordinate) - location = noncoding_pos_m["position"] - if noncoding_pos_m["region"] == "": - if location < self._coding[0]: - return { - "position": self._coding[0] - location, - "offset": noncoding_pos_m["offset"], - "region": "-" - } - elif location >= self._coding[1]: - return { - "position": location - self._coding[1] + 1, - "offset": noncoding_pos_m["offset"], - "region": "*" - } + if noncoding_pos_m["region"] in ["u", "d"]: + return noncoding_pos_m + + location = noncoding_pos_m["position"] + offset = noncoding_pos_m["offset"] + if location < self._coding[0]: + return { + "position": self._coding[0] - location, + "offset": offset, + "region": "-" + } + if location >= self._coding[1]: return { - "position": location - self._coding[0] + 1, - "offset": noncoding_pos_m["offset"], - "region": "" + "position": location - self._coding[1] + 1, + "offset": offset, + "region": "*" } - else: - return noncoding_pos_m + return { + "position": location - self._coding[0] + 1, + "offset": offset, + "region": "" + } def coordinate_to_coding(self, coordinate, degenerate=False): """Convert a coordinate to a coding position (c./r.). @@ -121,71 +162,45 @@ def coordinate_to_coding(self, coordinate, degenerate=False): :returns dict: Coding position model (c./r.). """ pos_m = self._coordinate_to_coding(coordinate) - region = pos_m["region"] - position = pos_m["position"] - if degenerate: - if region == "u": - if self._inverted: - pos_m["position"] = position + self._exons[1] - self._coding[1] + 1 - else: - pos_m["position"] = position + self._coding[0] - pos_m["region"] = "-" - if region == "d": - if self._inverted: - pos_m["position"] = position + self._coding[0] - else: - pos_m["position"] = position + self._exons[1]- self._coding[1] + 1 - pos_m["region"] = "*" + if degenerate and pos_m["region"] in ("u", "d"): + pos_m = self._degenerate_position(pos_m) + return pos_m - def coding_to_coordinate(self, pos_m): + def _coding_to_coordinate(self, pos_m): """Convert a coding position (c./r.) to a coordinate. :arg dict pos_m: Coding position model (c./r.). :returns int: Coordinate. """ - region = pos_m["region"] position = pos_m["position"] - offset = pos_m["offset"] + region = pos_m["region"] if region in ["u", "d"]: return self._noncoding.to_coordinate(pos_m) - elif region == "": - noncoding_pos_m = { - "position": position + self._coding[0] -1, - "offset": offset, - "region": "" - } + + noncoding_pos_m = {"offset": pos_m["offset"], "region": ""} + if region == "": + noncoding_pos_m["position"] = position + self._coding[0] - 1 elif region == "-": - if position > self._coding[0]: # correct it to 'u' - noncoding_pos_m = { - "position": position - self._coding[0] - offset, - "offset": 0, - "region": "u" - } - else: - noncoding_pos_m = { - "position": self._coding[0] - position, - "offset": offset, - "region": "" - } - else: # * - if position > self._coding[0]: # correct it to 'd' - noncoding_pos_m = { - "position": position - self._coding[0] + offset, - "offset": 0, - "region": "d" - } - else: - noncoding_pos_m = { - "position": self._coding[1] + position - 1, - "offset": offset, - "region": "" - } + noncoding_pos_m["position"] = self._coding[0] - position + else: + noncoding_pos_m["position"] = self._coding[1] + position - 1 + return self._noncoding.to_coordinate(noncoding_pos_m) + def coding_to_coordinate(self, pos_m): + """Convert a coding position (c./r.) to a coordinate. + + :arg dict pos_m: Coding position model (c./r.). + + :returns int: Coordinate. + """ + normalized_pos_m = self._normalize_position(pos_m) + + return self._coding_to_coordinate(normalized_pos_m) def coordinate_to_protein(self, coordinate): """Convert a coordinate to a protein position (p.). @@ -198,35 +213,18 @@ def coordinate_to_protein(self, coordinate): if pos["region"] == "u": pos = self.coordinate_to_coding(coordinate + pos["position"]) - return { - "position": pos["position"] // 3 + 1, - "position_in_codon": pos["position"] % 3, - "region": "u", - **{k: v for k, v in pos.items() if k not in ["position", "region"]}} - - if pos["region"] == "d": + elif pos["region"] == "d": pos = self.coordinate_to_coding(coordinate - pos["position"]) - return { - "position": pos["position"] // 3 + 1, - "position_in_codon": pos["position"] % 3, - "region": "d", - **{k: v for k, v in pos.items() if k not in ["position", "region"]}} + position = pos["position"] if pos["region"] == "-": return { - "position": (pos["position"]+2) // 3, - "position_in_codon": -pos["position"] % 3 + 1, + "position": abs(-position // 3), + "position_in_codon": -position % 3 + 1, **{k: v for k, v in pos.items() if k != "position"}} - - if pos["region"] == "*": - return { - "position": (pos["position"]+2)// 3, - "position_in_codon": (pos["position"] + 2) % 3 + 1, - **{k: v for k, v in pos.items() if k != "position"}} - return { - "position": (pos["position"]+2) // 3, - "position_in_codon": (pos["position"]+2) % 3 + 1, + "position": (position + 2) // 3, + "position_in_codon": (position + 2) % 3 + 1, **{k: v for k, v in pos.items() if k != "position"}} def protein_to_coordinate(self, pos_m): @@ -241,6 +239,7 @@ def protein_to_coordinate(self, pos_m): {"position": 3 * pos_m["position"] - pos_m["position_in_codon"] + 1, "offset": pos_m["offset"], "region": pos_m["region"]}) + return self.coding_to_coordinate( {"position": 3 * pos_m["position"] + pos_m["position_in_codon"] - 3, "offset": pos_m["offset"], diff --git a/tests/test_crossmapper.py b/tests/test_crossmapper.py index 55a9a92..a3bc5f7 100644 --- a/tests/test_crossmapper.py +++ b/tests/test_crossmapper.py @@ -105,8 +105,8 @@ def test_NonCoding_degenerate(): crossmap.noncoding_to_coordinate, 4, [ - {"position": 1, "offset": 0, "region": "u"}, {"position": 1, "offset": -1, "region": ""}, + {"position": 1, "offset": 0, "region": "u"}, ], ) @@ -116,6 +116,7 @@ def test_NonCoding_degenerate(): 72, [ {"position": 1, "offset": 0, "region": "d"}, + {"position": 22, "offset": 1, "region": ""}, {"position": 23, "offset": 0, "region": ""}, ], ) @@ -131,7 +132,8 @@ def test_NonCoding_inverted_degenerate(): 72, [ {"position": 1, "offset": -1, "region": ""}, - {"position": 1, "offset": 0, "region": "u"},], + {"position": 1, "offset": 0, "region": "u"}, + ], ) # Boundary between downstream and transcript. @@ -140,7 +142,9 @@ def test_NonCoding_inverted_degenerate(): 4, [ {"position": 1, "offset": 0, "region": "d"}, - {"position": 22, "offset": 1, "region": ""},], + {"position": 23, "offset": 0, "region": ""}, + {"position": 22, "offset": 1, "region": ""}, + ], ) @@ -464,16 +468,21 @@ def test_Coding_degenerate(): {"position": 2, "offset": 0, "region": "-"}, {"position": 1, "offset": -2, "region": ""}, {"position": 1, "offset": -10, "region": "*"}, + {"position": 2, "offset": -11, "region": "*"}, + {"position": 3, "offset": 1, "region": "-"}, + {"position": 4, "offset": 2, "region": "-"}, ], ) degenerate_equal( crossmap.coding_to_coordinate, 20, [ - {"position": 1, "offset": 1, "region": "*"}, + {"position": 1, "offset": 0, "region": "d"}, {"position": 2, "offset": 0, "region": "*"}, {"position": 8, "offset": 2, "region": ""}, {"position": 1, "offset": 10, "region": "-"}, + {"position": 2, "offset": 11, "region": "-"}, + {"position": 7, "offset": 3, "region": ""}, ], ) @@ -490,6 +499,8 @@ def test_Coding_inverted_degenerate(): {"position": 2, "offset": 0, "region": "-"}, {"position": 1, "offset": -2, "region": ""}, {"position": 1, "offset": -10, "region": "*"}, + {"position": 1, "offset": -11, "region": "d"}, + {"position": 2, "offset": -3, "region": ""}, ], ) degenerate_equal( @@ -500,6 +511,8 @@ def test_Coding_inverted_degenerate(): {"position": 2, "offset": 0, "region": "*"}, {"position": 8, "offset": 2, "region": ""}, {"position": 1, "offset": 10, "region": "-"}, + {"position": 1, "offset": 11, "region": "u"}, + {"position": 2, "offset": 12, "region": "u"}, ], ) @@ -563,6 +576,8 @@ def test_Coding_no_utr_degenerate(): {"position": 1, "offset": 0, "region": "u"}, {"position": 1, "offset": 0, "region": "-"}, {"position": 1, "offset": -2, "region": "*"}, + {"position": 1, "offset": -1, "region": ""}, + {"position": 1, "offset": -2, "region": "d"}, ], ) degenerate_equal( @@ -572,6 +587,8 @@ def test_Coding_no_utr_degenerate(): {"position": 1, "offset": 0, "region": "d"}, {"position": 1, "offset": 0, "region": "*"}, {"position": 1, "offset": 2, "region": "-"}, + {"position": 1, "offset": 1, "region": ""}, + {"position": 1, "offset": 2, "region": "u"}, ], ) @@ -587,6 +604,8 @@ def test_Coding_inverted_no_utr_degenerate(): {"position": 1, "offset": 0, "region": "u"}, {"position": 1, "offset": 0, "region": "-"}, {"position": 1, "offset": -2, "region": "*"}, + {"position": 1, "offset": -1, "region": ""}, + {"position": 1, "offset": -2, "region": "d"}, ], ) degenerate_equal( @@ -596,6 +615,8 @@ def test_Coding_inverted_no_utr_degenerate(): {"position": 1, "offset": 0, "region": "d"}, {"position": 1, "offset": 0, "region": "*"}, {"position": 1, "offset": 2, "region": "-"}, + {"position": 1, "offset": 1, "region": ""}, + {"position": 1, "offset": 2, "region": "u"}, ], ) From 186e2a69c0029e73b0e5a0b5f171cfa09dd05686 Mon Sep 17 00:00:00 2001 From: "X.Liu" <34545147+XLIU-hub@users.noreply.github.com> Date: Fri, 20 Mar 2026 09:52:55 +0100 Subject: [PATCH 074/127] Update tables in documentation --- README.rst | 70 ++++++++++++++++++++++++++++-------------------------- 1 file changed, 36 insertions(+), 34 deletions(-) diff --git a/README.rst b/README.rst index 29f15d1..cc007b2 100644 --- a/README.rst +++ b/README.rst @@ -129,13 +129,14 @@ Here is the mapping of coordinates to noncoding positions: :class: table-scroll :header: "Coordinate", "Position", "Offset", "Region", "HGVS" - "4", "1","0", "'u'", "n.u1" - "5", "1","0", "''", "n.1" - "24", "9","5", "''", "n.9+5" - "25", "10","-5", "''", "n.10-5" - "71", "22","0", "''", "n.22" - "72", "1","0", "'d'", "n.d1" - "79", "8","0", "'d'", "n.d8" + "0", "5", "0", "u", "c.u5" + "4", "1", "0", "u", "n.u1" + "5", "1", "0", "", "n.1" + "24", "9", "5", "", "n.9+5" + "25", "10", "-5", "", "n.10-5" + "71", "22", "0", "", "n.22" + "72", "1", "0", "d", "n.d1" + "79", "8", "0", "d", "n.d8" @@ -154,7 +155,7 @@ Coding positions follow the HGVS ``c`` coordinate system. They are represented a { "position": 1, "offset": 3, - "region": '*' + "region": "*" } Where: @@ -186,19 +187,20 @@ Here is the mapping of coordinates to coding positions: :class: table-scroll :header: "Coordinate", "Position", "Offset", "Region", "HGVS" - "4", "1","0", "'u'", "c.u1" - "5", "11","0", '-', "c.-11" - "24", "3","5", '-', "c.-3+5" - "25", "2","-5", '-', "c.-2-5" - "31", "1","0", '-', "c.-1" - "32", "1","0", "''", "c.1" - "37", "3","3", "''", "c.3+3" - "38", "4","-2", "''", "c.4-2" - "43", "1","0", '*', "c.*1" - "60", "3","9", '*', "c.*3+9" - "61", "4","-9", '*', "c.*4+9" - "71", "5","0", '*', "c.*5" - "79", "8","0", "'d'", "c.d8" + "0", "5", "0", "u", "c.u5" + "4", "1", "0", "u", "c.u1" + "5", "11", "0", "\-", "c.-11" + "24", "3", "5", "\-", "c.-3+5" + "25", "2", "-5", "\-", "c.-2-5" + "31", "1", "0", "\-", "c.-1" + "32", "1", "0", "", "c.1" + "37", "3", "3", "", "c.3+3" + "38", "4", "-2", "", "c.4-2" + "43", "1", "0", "\*", "c.*1" + "60", "3", "9", "\*", "c.*3+9" + "61", "4", "-9", "\*", "c.*4+9" + "71", "5", "0", "\*", "c.*5" + "79", "8", "0", "d", "c.d8" @@ -244,19 +246,19 @@ Here is the mapping of coordinates to protein positions: .. csv-table:: :class: table-scroll - :header: "Coordinate", "Position", "position_in_codon", "Offset", "Region", "HGVS" - - "0", "4", "2", "0", "'u'", - "4", "4", "2", "0", "'u'", - "5", "4", "2", "0", '-', - "6", "4", "3", "0", '-', - "7", "3", "1", "0", '-', - "31", "1", "3", "0", '-', - "32", "1", "1", "0", "''", "p.1" - "42", "2", "3", "0", "''", "p.2" - "43", "1", "1", "0", '*', - "44", "1", "1", "1", '*', - "79", "2", "2", "0", "'d'", + :header: "Coordinate", "Position", "position_in_codon", "Offset", "Region", "HGVS" + + "0", "4", "2", "0", "u", + "4", "4", "2", "0", "u", + "5", "4", "2", "0", "\-", + "6", "4", "3", "0", "\-", + "7", "3", "1", "0", "\-", + "31", "1", "3", "0", "\-", + "32", "1", "1", "0", "", "p.1" + "42", "2", "3", "0", "", "p.2" + "43", "1", "1", "0", "\*", + "44", "1", "1", "1", "\*", + "79", "2", "2", "0", "d", From 617b126d548c540d369d36ceea919474ec1ed2dc Mon Sep 17 00:00:00 2001 From: "X.Liu" <34545147+XLIU-hub@users.noreply.github.com> Date: Fri, 20 Mar 2026 09:54:28 +0100 Subject: [PATCH 075/127] Update README.rst --- README.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index cc007b2..79e9c48 100644 --- a/README.rst +++ b/README.rst @@ -115,7 +115,7 @@ NonCoding Position Conversion >>> crossmap = NonCoding(_exons) >>> crossmap.coordinate_to_noncoding(25) {"position": 10, "offset": -5, "region": ''} - >>> crossmap.noncoding_to_coordinate({"position": 10, "offset": -5, "region": ''}) + >>> crossmap.noncoding_to_coordinate({"position": 10, "offset": -5, "region": ""}) 25 Notes @@ -173,7 +173,7 @@ Coding Position Conversion >>> crossmap = Coding(_exons, _cds) >>> crossmap.coordinate_to_coding(46) {"position": 1, "offset": 3, "region": '*'} - >>> crossmap.coding_to_coordinate({"position": 1, "offset": 3, "region": '*'}) + >>> crossmap.coding_to_coordinate({"position": 1, "offset": 3, "region": "*"}) 46 Notes From 4fddbc316100af52b75e570b6de9aa15f89569db Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Fri, 20 Mar 2026 10:47:40 +0100 Subject: [PATCH 076/127] Use single quate for dictionary --- mutalyzer_crossmapper/crossmapper.py | 130 +++++++++++++-------------- 1 file changed, 65 insertions(+), 65 deletions(-) diff --git a/mutalyzer_crossmapper/crossmapper.py b/mutalyzer_crossmapper/crossmapper.py index 0fd0c09..3994271 100644 --- a/mutalyzer_crossmapper/crossmapper.py +++ b/mutalyzer_crossmapper/crossmapper.py @@ -10,7 +10,7 @@ def coordinate_to_genomic(self, coordinate): :returns dict: Genomic position model. """ - return {"position": coordinate + 1} + return {'position': coordinate + 1} def genomic_to_coordinate(self, pos_m): """Convert a genomic position (g./m./o.) to a coordinate. @@ -19,7 +19,7 @@ def genomic_to_coordinate(self, pos_m): :returns int: Coordinate. """ - return pos_m["position"] - 1 + return pos_m['position'] - 1 class NonCoding(Genomic): @@ -41,8 +41,8 @@ def coordinate_to_noncoding(self, coordinate): :returns dict: Noncoding position model. """ pos_m = self._noncoding.to_position(coordinate) - if pos_m["region"] == "": - pos_m["position"] = pos_m["position"] + 1 + if pos_m['region'] == '': + pos_m['position'] = pos_m['position'] + 1 return pos_m def noncoding_to_coordinate(self, pos_m): @@ -53,8 +53,8 @@ def noncoding_to_coordinate(self, pos_m): :returns int: Coordinate. """ multilocus_pos_m = {**pos_m} - if pos_m["region"] == "": - multilocus_pos_m["position"] = pos_m["position"] - 1 + if pos_m['region'] == '': + multilocus_pos_m['position'] = pos_m['position'] - 1 return self._noncoding.to_coordinate(multilocus_pos_m) @@ -74,11 +74,11 @@ def __init__(self, locations, cds, inverted=False): e1 = self._noncoding.to_position(locations[-1][1]-1) if self._inverted: - self._coding = (b1["position"] + b1["offset"], b0["position"] + b0["offset"] + 1) - self._exons = (e1["position"], e0["position"]) + self._coding = (b1['position'] + b1['offset'], b0['position'] + b0['offset'] + 1) + self._exons = (e1['position'], e0['position']) else: - self._coding = (b0["position"] + b0["offset"], b1["position"] + b1["offset"] +1) - self._exons = (e0["position"], e1["position"]) + self._coding = (b0['position'] + b0['offset'], b1['position'] + b1['offset'] +1) + self._exons = (e0['position'], e1['position']) def _degenerate_position(self, pos_m): """Degenerate a coding position model (c./r.). @@ -87,23 +87,23 @@ def _degenerate_position(self, pos_m): :returns dict: a generate coding position model. """ - region = pos_m["region"] - position = pos_m["position"] + region = pos_m['region'] + position = pos_m['position'] - degenerated_pos_m = {"offset": pos_m["offset"]} + degenerated_pos_m = {'offset': pos_m['offset']} - if region == "u": + if region == 'u': if self._inverted: - degenerated_pos_m["position"] = position + self._exons[1] - self._coding[1] + 1 + degenerated_pos_m['position'] = position + self._exons[1] - self._coding[1] + 1 else: - degenerated_pos_m["position"] = position + self._coding[0] - degenerated_pos_m["region"] = "-" - if region == "d": + degenerated_pos_m['position'] = position + self._coding[0] + degenerated_pos_m['region'] = '-' + if region == 'd': if self._inverted: - degenerated_pos_m["position"] = position + self._coding[0] + degenerated_pos_m['position'] = position + self._coding[0] else: - degenerated_pos_m["position"] = position + self._exons[1]- self._coding[1] + 1 - degenerated_pos_m["region"] = "*" + degenerated_pos_m['position'] = position + self._exons[1]- self._coding[1] + 1 + degenerated_pos_m['region'] = '*' return degenerated_pos_m def _normalize_position(self, pos_m): @@ -113,12 +113,12 @@ def _normalize_position(self, pos_m): :returns dict: a normalized coding postion model. """ - initial_pos = {**pos_m, "offset": 0} + initial_pos = {**pos_m, 'offset': 0} coordinate = self._coding_to_coordinate(initial_pos) if self._inverted: - coordinate = coordinate - pos_m["offset"] + coordinate = coordinate - pos_m['offset'] else: - coordinate = coordinate + pos_m["offset"] + coordinate = coordinate + pos_m['offset'] return self.coordinate_to_coding(coordinate) def _coordinate_to_coding(self, coordinate): @@ -130,27 +130,27 @@ def _coordinate_to_coding(self, coordinate): """ noncoding_pos_m = self._noncoding.to_position(coordinate) - if noncoding_pos_m["region"] in ["u", "d"]: + if noncoding_pos_m['region'] in ['u', 'd']: return noncoding_pos_m - location = noncoding_pos_m["position"] - offset = noncoding_pos_m["offset"] + location = noncoding_pos_m['position'] + offset = noncoding_pos_m['offset'] if location < self._coding[0]: return { - "position": self._coding[0] - location, - "offset": offset, - "region": "-" + 'position': self._coding[0] - location, + 'offset': offset, + 'region': '-' } if location >= self._coding[1]: return { - "position": location - self._coding[1] + 1, - "offset": offset, - "region": "*" + 'position': location - self._coding[1] + 1, + 'offset': offset, + 'region': '*' } return { - "position": location - self._coding[0] + 1, - "offset": offset, - "region": "" + 'position': location - self._coding[0] + 1, + 'offset': offset, + 'region': '' } def coordinate_to_coding(self, coordinate, degenerate=False): @@ -163,7 +163,7 @@ def coordinate_to_coding(self, coordinate, degenerate=False): """ pos_m = self._coordinate_to_coding(coordinate) - if degenerate and pos_m["region"] in ("u", "d"): + if degenerate and pos_m['region'] in ('u', 'd'): pos_m = self._degenerate_position(pos_m) return pos_m @@ -175,19 +175,19 @@ def _coding_to_coordinate(self, pos_m): :returns int: Coordinate. """ - position = pos_m["position"] - region = pos_m["region"] + position = pos_m['position'] + region = pos_m['region'] - if region in ["u", "d"]: + if region in ['u', 'd']: return self._noncoding.to_coordinate(pos_m) - noncoding_pos_m = {"offset": pos_m["offset"], "region": ""} - if region == "": - noncoding_pos_m["position"] = position + self._coding[0] - 1 - elif region == "-": - noncoding_pos_m["position"] = self._coding[0] - position + noncoding_pos_m = {'offset': pos_m['offset'], 'region': ''} + if region == '': + noncoding_pos_m['position'] = position + self._coding[0] - 1 + elif region == '-': + noncoding_pos_m['position'] = self._coding[0] - position else: - noncoding_pos_m["position"] = self._coding[1] + position - 1 + noncoding_pos_m['position'] = self._coding[1] + position - 1 return self._noncoding.to_coordinate(noncoding_pos_m) @@ -211,21 +211,21 @@ def coordinate_to_protein(self, coordinate): """ pos = self.coordinate_to_coding(coordinate) - if pos["region"] == "u": - pos = self.coordinate_to_coding(coordinate + pos["position"]) - elif pos["region"] == "d": - pos = self.coordinate_to_coding(coordinate - pos["position"]) + if pos['region'] == 'u': + pos = self.coordinate_to_coding(coordinate + pos['position']) + elif pos['region'] == 'd': + pos = self.coordinate_to_coding(coordinate - pos['position']) - position = pos["position"] - if pos["region"] == "-": + position = pos['position'] + if pos['region'] == '-': return { - "position": abs(-position // 3), - "position_in_codon": -position % 3 + 1, - **{k: v for k, v in pos.items() if k != "position"}} + 'position': abs(-position // 3), + 'position_in_codon': -position % 3 + 1, + **{k: v for k, v in pos.items() if k != 'position'}} return { - "position": (position + 2) // 3, - "position_in_codon": (position + 2) % 3 + 1, - **{k: v for k, v in pos.items() if k != "position"}} + 'position': (position + 2) // 3, + 'position_in_codon': (position + 2) % 3 + 1, + **{k: v for k, v in pos.items() if k != 'position'}} def protein_to_coordinate(self, pos_m): """Convert a protein position (p.) to a coordinate. @@ -234,13 +234,13 @@ def protein_to_coordinate(self, pos_m): :returns int: Coordinate. """ - if pos_m["region"] == "-": + if pos_m['region'] == '-': return self.coding_to_coordinate( - {"position": 3 * pos_m["position"] - pos_m["position_in_codon"] + 1, - "offset": pos_m["offset"], - "region": pos_m["region"]}) + {'position': 3 * pos_m['position'] - pos_m['position_in_codon'] + 1, + 'offset': pos_m['offset'], + 'region': pos_m['region']}) return self.coding_to_coordinate( - {"position": 3 * pos_m["position"] + pos_m["position_in_codon"] - 3, - "offset": pos_m["offset"], - "region": pos_m["region"]}) + {'position': 3 * pos_m['position'] + pos_m['position_in_codon'] - 3, + 'offset': pos_m['offset'], + 'region': pos_m['region']}) From f85b25f5a73e588fd07d887ac8e2082b6161c206 Mon Sep 17 00:00:00 2001 From: "X.Liu" <34545147+XLIU-hub@users.noreply.github.com> Date: Fri, 20 Mar 2026 10:59:20 +0100 Subject: [PATCH 077/127] Delete table --- README.rst | 251 ++++++++++------------------------------------------- 1 file changed, 45 insertions(+), 206 deletions(-) diff --git a/README.rst b/README.rst index 79e9c48..d398402 100644 --- a/README.rst +++ b/README.rst @@ -27,239 +27,78 @@ HGVS position crossmapper This library provides an interface to convert (cross map) between different HGVS numbering_ systems. -Converting between the transcript oriented c. or n. and the genomic oriented g. +Converting between the transcript oriented ``c.`` or ``n.`` and the genomic oriented ``g.`` numbering systems can be difficult, especially when the transcript in question -resides on the complement strand. +resides on the complement strand. This library provides functions to convert between any HGVS +numbering system to standard (0-based) coordinates and vice versa. **Features:** -- Support for genomic positions to standard coordinates and vice versa. -- Support for noncoding positions to standard coordinates and vice versa. -- Support for coding positions to standard coordinates and vice versa. -- Support for protein positions to standard coordinates and vice versa. -- Basic classes for loci that can be used for genomic loci other than genes. +- Support for genomic (``g.``, ``m.``, ``o.``) positions to standard coordinates and vice versa. +- Support for noncoding (``n.``, ``r.``) positions to standard coordinates and vice versa. +- Support for coding (``c.``, ``r.``) positions to standard coordinates and vice versa. +- Support for protein (``p.``) positions to standard coordinates and vice versa. +- Basic classes that can be used for loci other than genes or transcripts. Please see ReadTheDocs_ for the latest documentation. -Quick Start -=========== +Quick start +----------- -An example below uses the following transcript data: +The ``Genomic`` class provides an interface to conversions between genomic +positions and coordinates. -.. code-block:: python - - >>>_exons = [(5, 8), (14, 20), (30, 35), (40, 44), (50, 52), (70, 72)] - >>>_cds = (32, 43) - - -Genomic Class -------------- - -The ``Genomic`` class provides an interface for conversions between genomic positions and coordinates. - -Genomic Position Model -~~~~~~~~~~~~~~~~~~~~~~~ - -Genomic positions follow the HGVS ``g`` coordinate system. They are represented as dictionaries. Below is an example of `g.1` in HGVS. - -.. code-block:: json - - {"position": 1} - -Where: - -- **position**: a positive integer(>0) - -Genomic Position Conversion -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. code-block:: python +.. code:: python >>> from mutalyzer_crossmapper import Genomic >>> crossmap = Genomic() >>> crossmap.coordinate_to_genomic(0) - {"position": 1} - >>> crossmap.genomic_to_coordinate({"position": 1}) + 1 + >>> crossmap.genomic_to_coordinate({'position': 1}) 0 -NonCoding Class ---------------- - -The ``NonCoding`` class provides conversions between noncoding positions and coordinates. - -NonCoding Position Model -~~~~~~~~~~~~~~~~~~~~~~~ +On top of the functionality provided by the ``Genomic`` class, the +``NonCoding`` class provides an interface to conversions between noncoding +positions and coordinates. -Noncoding positions follow the HGVS ``n`` coordinate system. They are represented as dictionaries. Below is an example of ``n.14+1`` in HGVS. - -.. code-block:: json - - { - "position": 10, - "offset": -5, - "region": "" - } - -Where: - -- **position**: a positive integer (>0) -- **offset**: an integer indicating the offset relative to the position (negative for upstream, positive for downstream) -- **region**: a string describing the region type (``''`` for standard, ``'u'`` for upstream, ``'d'`` for downstream) - -NonCoding Position Conversion -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. code-block:: python +.. code:: python >>> from mutalyzer_crossmapper import NonCoding - >>> crossmap = NonCoding(_exons) - >>> crossmap.coordinate_to_noncoding(25) - {"position": 10, "offset": -5, "region": ''} - >>> crossmap.noncoding_to_coordinate({"position": 10, "offset": -5, "region": ""}) - 25 - -Notes -~~~~~ - -- Add the flag ``inverted=True`` to the constructor when the transcript resides on the reverse complement strand. - -Here is the mapping of coordinates to noncoding positions: - -.. csv-table:: - :class: table-scroll - :header: "Coordinate", "Position", "Offset", "Region", "HGVS" - - "0", "5", "0", "u", "c.u5" - "4", "1", "0", "u", "n.u1" - "5", "1", "0", "", "n.1" - "24", "9", "5", "", "n.9+5" - "25", "10", "-5", "", "n.10-5" - "71", "22", "0", "", "n.22" - "72", "1", "0", "d", "n.d1" - "79", "8", "0", "d", "n.d8" - - + >>> exons = [(5, 8), (14, 20), (30, 35), (40, 44), (50, 52), (70, 72)] + >>> crossmap = NonCoding(exons) + >>> crossmap.coordinate_to_noncoding(35) + {'position':14, 'offset':1, 'region':''} + >>> crossmap.noncoding_to_coordinate({'position':14, 'offset':1, 'region':''}) + 35 -Coding Class ------------- +Add the flag ``inverted=True`` to the constructor when the transcript resides +on the reverse complement strand. -The ``Coding`` class provides conversions between coding positions and coordinates, as well as protein positions. +On top of the functionality provided by the ``NonCoding`` class, the ``Coding`` +class provides an interface to conversions between coding positions and +coordinates as well as conversions between protein positions and coordinates. -Coding Position Model -~~~~~~~~~~~~~~~~~~~~ - -Coding positions follow the HGVS ``c`` coordinate system. They are represented as dictionaries. Here is an example of ``c.*1+3``. - -.. code-block:: json - - { - "position": 1, - "offset": 3, - "region": "*" - } - -Where: - -- **position**: a positive integer -- **offset**: an integer indicating the offset relative to the position -- **region**: a string describing the region type (`""` for standard coding positions, `'-'` for 5' UTR, `'*'` for 3' UTR, `'u'` for upstream and ``"d"`` for downstream) - -Coding Position Conversion -~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. code-block:: python +.. code:: python >>> from mutalyzer_crossmapper import Coding - >>> crossmap = Coding(_exons, _cds) - >>> crossmap.coordinate_to_coding(46) - {"position": 1, "offset": 3, "region": '*'} - >>> crossmap.coding_to_coordinate({"position": 1, "offset": 3, "region": "*"}) - 46 - -Notes -~~~~~ - -- The flag ``inverted=True`` can be used for transcripts on the reverse complement strand. - -Here is the mapping of coordinates to coding positions: - -.. csv-table:: - :class: table-scroll - :header: "Coordinate", "Position", "Offset", "Region", "HGVS" - - "0", "5", "0", "u", "c.u5" - "4", "1", "0", "u", "c.u1" - "5", "11", "0", "\-", "c.-11" - "24", "3", "5", "\-", "c.-3+5" - "25", "2", "-5", "\-", "c.-2-5" - "31", "1", "0", "\-", "c.-1" - "32", "1", "0", "", "c.1" - "37", "3", "3", "", "c.3+3" - "38", "4", "-2", "", "c.4-2" - "43", "1", "0", "\*", "c.*1" - "60", "3", "9", "\*", "c.*3+9" - "61", "4", "-9", "\*", "c.*4+9" - "71", "5", "0", "\*", "c.*5" - "79", "8", "0", "d", "c.d8" - - - - -Protein -------- - -Protein Position Model -~~~~~~~~~~~~~~~~~~~~~~ - -Protein positions follow the HGVS ``p`` coordinate system. They are represented as dictionaries. Here is an example of ``p.1`` in HGVS. - -.. code-block:: json - - { - "position": 1, - "position_in_codon": 3, - "offset": 3, - "region": "" - } - -Where: - -- **position**: the amino acid position (1-based) -- **position_in_codon**: the codon nucleotide index (1, 2, or 3) -- **offset**: an integer indicating offset relative to the codon -- **region**: a string describing the region type (``''`` for standard positions) - -Protein Position Conversion -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Conversions between protein positions and coordinates: - -.. code-block:: python - - >>> crossmap.coordinate_to_protein(37) - {"position": 1, "position_in_codon": 3, "offset": 3, "region": ""} - >>> crossmap.protein_to_coordinate({"position": 1, "position_in_codon": 3, "offset": 3, "region": ""}) - 37 - + >>> cds = (32, 43) + >>> crossmap = Coding(exons, cds) + >>> crossmap.coordinate_to_coding(31) + {'position':1, 'offset':0, 'region':'-'} + >>> crossmap.coding_to_coordinate({'position':1, 'offset':0, 'region':'-'}) + 31 -Here is the mapping of coordinates to protein positions: +Again, the flag ``inverted=True`` can be used for transcripts that reside on +the reverse complement strand. -.. csv-table:: - :class: table-scroll - :header: "Coordinate", "Position", "position_in_codon", "Offset", "Region", "HGVS" +Conversions between protein positions and coordinates are done as follows. - "0", "4", "2", "0", "u", - "4", "4", "2", "0", "u", - "5", "4", "2", "0", "\-", - "6", "4", "3", "0", "\-", - "7", "3", "1", "0", "\-", - "31", "1", "3", "0", "\-", - "32", "1", "1", "0", "", "p.1" - "42", "2", "3", "0", "", "p.2" - "43", "1", "1", "0", "\*", - "44", "1", "1", "1", "\*", - "79", "2", "2", "0", "d", +.. code:: python + >>> crossmap.coordinate_to_protein(41) + {'position':2, 'position_in_codon': 2, 'offset':0, 'region':''} + >>> crossmap.protein_to_coordinate({'position':2, 'position_in_codon': 2, 'offset':0, 'region':''}) + 41 From ca4379ef865138c0d66a9436f102497b1f91dbf5 Mon Sep 17 00:00:00 2001 From: "X.Liu" <34545147+XLIU-hub@users.noreply.github.com> Date: Fri, 20 Mar 2026 11:32:56 +0100 Subject: [PATCH 078/127] Update README.rst --- README.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index d398402..fd0308c 100644 --- a/README.rst +++ b/README.rst @@ -54,7 +54,7 @@ positions and coordinates. >>> crossmap = Genomic() >>> crossmap.coordinate_to_genomic(0) 1 - >>> crossmap.genomic_to_coordinate({'position': 1}) + >>> crossmap.genomic_to_coordinate({'position':1}) 0 On top of the functionality provided by the ``Genomic`` class, the @@ -97,7 +97,7 @@ Conversions between protein positions and coordinates are done as follows. >>> crossmap.coordinate_to_protein(41) {'position':2, 'position_in_codon': 2, 'offset':0, 'region':''} - >>> crossmap.protein_to_coordinate({'position':2, 'position_in_codon': 2, 'offset':0, 'region':''}) + >>> crossmap.protein_to_coordinate({'position':2, 'position_in_codon':2, 'offset':0, 'region':''}) 41 From 072719d7dd1a1c27b7be8adb1e234dca56d7cc78 Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Fri, 20 Mar 2026 13:32:07 +0100 Subject: [PATCH 079/127] Update library, replace tuple with dictionary --- docs/library.rst | 308 ++++++++++++++++++++++++++--------------------- 1 file changed, 173 insertions(+), 135 deletions(-) diff --git a/docs/library.rst b/docs/library.rst index 6ef095a..032b5c8 100644 --- a/docs/library.rst +++ b/docs/library.rst @@ -8,7 +8,24 @@ The ``Genomic`` class --------------------- The ``Genomic`` class provides an interface to conversions between genomic -positions and coordinates. +(``g.``, ``m``, ``n``) positions and coordinates. + +Genomic Position Model +~~~~~~~~~~~~~~~~~~~~~~~ + +Genomic positions follow the HGVS genomic coordinate system. +They are represented as 1-key dictionaries. Below is an example of `g.1` in HGVS. + +.. code-block:: json + + {'position':1} + +Where: + +- **position**: a positive integer(>0) + +Genomic Position Conversion +~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: python @@ -22,7 +39,7 @@ used to convert to and from genomic positions. >>> crossmap.coordinate_to_genomic(0) 1 - >>> crossmap.genomic_to_coordinate(1) + >>> crossmap.genomic_to_coordinate({'position':1}) 0 See section :doc:`api/crossmap` for a detailed description. @@ -32,8 +49,33 @@ The ``NonCoding`` class On top of the functionality provided by the ``Genomic`` class, the ``NonCoding`` class provides an interface to conversions between noncoding -positions and coordinates. Conversions between positioning systems should be -done via a coordinate. +(``n.``, ``r.``) positions and coordinates. Conversions between positioning +systems should be done via a coordinate. + +NonCoding Position Model +~~~~~~~~~~~~~~~~~~~~~~~~ + +Noncoding positions follow the HGVS ``n`` coordinate system. They are represented +as 3-key dictionaries. Below is an example of ``n.14+1`` in HGVS. + +.. code-block:: json + + { + 'position': 14, + 'offset': 1, + 'region': '' + } + +Where: + +- **position**: an interger representing a transcript position (>0) +- **offset**: an integer indicating the offset relative to the position (negative for upstream, + positive for downstream) +- **region**: a string describing the region type (``''`` for standard, ``'u'`` for upstream, + ``'d'`` for downstream) + +NonCoding Position Conversion +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: python @@ -41,22 +83,8 @@ done via a coordinate. >>> exons = [(5, 8), (14, 20), (30, 35), (40, 44), (50, 52), (70, 72)] >>> crossmap = NonCoding(exons) -Now the functions ``coordinate_to_noncoding()`` and -``noncoding_to_coordinate()`` can be used. These functions use a 3-tuple to -represent a noncoding position. - -.. _table_noncoding: -.. list-table:: Noncoding positions. - :header-rows: 1 - - * - index - - description - * - 0 - - Transcript position. - * - 1 - - Offset. - * - 2 - - Upstream or downstream offset. +Now the functions ``coordinate_to_noncoding()`` and ``noncoding_to_coordinate()`` +can be used. In our example, the HGVS position "g.36" (coordinate ``35``) is equivalent to position "n.14+1". We can convert between these two as follows. @@ -64,25 +92,24 @@ position "n.14+1". We can convert between these two as follows. .. code:: python >>> crossmap.coordinate_to_noncoding(35) - (14, 1, 0) + {'position':14, 'offset':1, 'region':''} + >>> crossmap.noncoding_to_coordinate({'position':14, 'offset':1, 'region':''}) + {'position':14, 'offset':1, 'region':''} -When the coordinate is upstream or downstream of the transcript, the last -element of the tuple denotes the offset with respect to the transcript. This -makes it possible to distinguish between intronic positions and those outside -of the transcript. +When the coordinate is upstream or downstream of the transcript, we use ``'u`` to +present upstream and ``'d'`` to present downstream. .. code:: python >>> crossmap.coordinate_to_noncoding(2) - (1, -3, -3) + {'position':3, 'offset':0, 'region':'u'} + >>> crossmap.noncoding_to_coordinate({'position':3, 'offset':0, 'region':'u'}) + 2 >>> crossmap.coordinate_to_noncoding(73) - (22, 2, 2) + {'position':2, 'offset':0, 'region':'d'} + >>> crossmap.noncoding_to_coordinate({'position':2, 'offset':0, 'region':'d'}) + 73 -Note that this last element is optional (and ignored) when a conversion to a -coordinate is requested. - - >>> crossmap.noncoding_to_coordinate((14, 1)) - 35 For transcripts that reside on the reverse complement strand, the ``inverted`` parameter should be set to ``True``. In our example, HGVS position "g.36" @@ -92,18 +119,55 @@ parameter should be set to ``True``. In our example, HGVS position "g.36" >>> crossmap = NonCoding(exons, inverted=True) >>> crossmap.coordinate_to_noncoding(35) - (9, -1, 0) - >>> crossmap.noncoding_to_coordinate((9, -1)) + {'position':9, 'offset':-1, 'region':''} + >>> crossmap.noncoding_to_coordinate({'position':9, 'offset':-1, 'region':''}) 35 +In the following table, we show a number of annotated examples. +.. csv-table:: + :class: table-scroll + :header: "Coordinate", "Position", "Offset", "Region", "HGVS" + + "0", "5", "0", `u`, `n.u5` + "4", "1", "0", `u`, `n.u1` + "5", "1", "0", `""`, `n.1` + "24", "9", "5", `""`, `n.9+5` + "25", "10", "-5", `""`, `n.10-5` + "71", "22", "0", `""`, `n.22` + "72", "1", "0", `d`, `n.d1` + "79", "8", "0", `d`, `n.d8` + See section :doc:`api/crossmap` for a detailed description. The ``Coding`` class -------------------- The ``Coding`` class provides an interface to all conversions between -positioning systems and coordinates. Conversions between positioning systems -should be done via a coordinate. +coding (``c.``, ``r.``) rpositioning systems and coordinates. Conversions between +positioning systems should be done via a coordinate. + +Coding Position Model +~~~~~~~~~~~~~~~~~~~~~ +Coding positions follow the HGVS ``c`` coordinate system. They are +represented as 3-key dictionaries. Here is an example of ``c.*1+3``. + +.. code-block:: json + + { + 'position': 1, + 'offset': 3, + 'region': '*' + } + +Where: + +- **position**: an interger representing a transcript position (>0) +- **offset**: an integer indicating the offset relative to the position +- **region**: a string describing the region type (`''` for standard coding positions, + `'-'` for 5' UTR, `'*'` for 3' UTR, `'u'` for upstream and ``'d'`` for downstream) + +Coding Position Conversion +~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: python @@ -114,40 +178,7 @@ should be done via a coordinate. On top of the functionality provided by the ``NonCoding`` class, the functions ``coordinate_to_coding()`` and ``coding_to_coordinate()`` can be used. These -functions use a 4-tuple to represent a coding position. - -.. list-table:: Coding positions. - :header-rows: 1 - - * - index - - description - * - 0 - - Transcript position. - * - 1 - - Offset. - * - 2 - - Region. - * - 3 - - Upstream or downstream offset. - -The region denotes the location of the position with respect to the CDS. This -is needed in order to work with the HGVS "-" and "*" positions. - -.. list-table:: Coding position regions. - :header-rows: 1 - - * - value - - description - - HGVS example - * - ``-1`` - - Upstream of the CDS. - - "c.-10" - * - ``0`` - - In the CDS. - - "c.1" - * - ``1`` - - Downstream of the CDS. - - "c.*10" +functions use a 3-key dictionary to represent a coding position. In our example, the HGVS position "g.32" (coordinate ``31``) is equivalent to position "c.-1". We can convert between these two as follows. @@ -155,40 +186,64 @@ position "c.-1". We can convert between these two as follows. .. code:: python >>> crossmap.coordinate_to_coding(31) - (-1, 0, -1, 0) - >>> crossmap.coding_to_coordinate((-1, 0, -1)) + {'position':1, 'offset':0, 'region':'-'} + >>> crossmap.coding_to_coordinate({'position':1, 'offset':0, 'region':'-'}) 31 The ``coordinate_to_coding()`` function accepts an optional ``degenerate`` argument. When set to ``True``, positions outside of the transcript are no -longer described using the offset notation. +longer described using the ``'u'`` or ``'d'`` notation. .. code:: python >>> crossmap.coordinate_to_coding(4) - (-11, -1, -1, -1) + {'position':1, 'offset':0, 'region':'u'} >>> crossmap.coordinate_to_coding(4, True) - (-12, 0, -1, -1) + {'position':12, 'offset':0, 'region':'-'} + +In the following table, we show a number of annotated examples. + +.. csv-table:: + :class: + :header: "Coordinate", "Position", "Offset", "Region", "HGVS" + + "0", "5", "0", `u`, `c.u5` + "4", "1", "0", `u`, `c.u1` + "5", "11", "0", `-`, `c.-11` + "24", "3", "5", `-`, `c.-3+5` + "31", "1", "0", `-`, `c.-1` + "32", "1", "0", `""`, `c.1` + "37", "3", "3", `""`, `c.3+3` + "38", "4", "-2", `""`, `c.4-2` + "43", "1", "0", `*`, `c.*1` + "61", "4", "-9", `*`, `c.*4+9` + "71", "5", "0", `*`, `c.*5` + "79", "8", "0", `d`, `c.d8` + + +Protein +------- Additionally, the functions ``coordinate_to_protein()`` and -``protein_to_coordinate()`` can be used. These functions use a 5-tuple to -represent a protein position. - -.. list-table:: Protein positions. - :header-rows: 1 - - * - index - - description - * - 0 - - Protein position. - * - 1 - - Codon position. - * - 2 - - Offset. - * - 3 - - Region. - * - 4 - - Upstream or downstream offset. +``protein_to_coordinate()`` can be used. These functions use a 4-key dictionary +to represent a protein position. Here is an example of ``p.1`` in HGVS. + +.. code-block:: json + + { + 'position': 1, + 'position_in_codon': 3, + 'offset': 3, + 'region': '' + } + +Where: + +- **position**: an interger representing the protein position (>0) +- **position_in_codon**: an integer indicating the nucleotide index within the codon (1, 2, or 3) +- **offset**: an integer indicating offset relative to the codon +- **region**: a string describing the region type (``''`` for standard positions) + In our example the HGVS position "g.42" (coordinate ``41``) corresponds with position "p.2". We can convert between these to as follows. @@ -196,49 +251,32 @@ position "p.2". We can convert between these to as follows. .. code:: python >>> crossmap.coordinate_to_protein(41) - (2, 2, 0, 0, 0) - >>> crossmap.protein_to_coordinate((2, 2, 0, 0)) + {'position':2, 'position_in_codon':2, 'offset':0, 'region':''} + >>> crossmap.protein_to_coordinate({'position':2, 'position_in_codon':2, offset':0, 'region':''}) 41 Note that the protein position only corresponds with the HGVS "p." notation when the offset equals ``0`` and the region equals ``1``. In the following table, we show a number of annotated examples. -.. list-table:: Protein positions examples. - :header-rows: 1 - - * - coordinate - - protein position - - description - - HGVS position - * - ``4`` - - ``(-4, 2, -1, -1, -1)`` - - Upstream position. - - invalid - * - ``31`` - - ``(-1, 3, 0, -1, 0)`` - - 5' UTR position. - - invalid - * - ``36`` - - ``(1, 3, 2, 0, 0)`` - - Intronic position. - - invalid - * - ``40`` - - ``(2, 1, 0, 0, 0)`` - - Second amino acid, first nucleotide. - - "p.2" - * - ``41`` - - ``(2, 2, 0, 0, 0)`` - - Second amino acid, second nucleotide. - - "p.2" - * - ``43`` - - ``(1, 1, 0, 1, 0)`` - - 3' UTR position. - - invalid - * - ``43`` - - ``(2, 2, 2, 1, 2)`` - - Downstream position. - - invalid +.. csv-table:: + :class: table-scroll + :header: "Coordinate", "Position", "position_in_codon", "Offset", "Region", "HGVS" + + "0", "4", "2", "0", `u`, `` + "4", "4", "2", "0", `u`, `` + "5", "4", "2", "0", `-`, `` + "6", "4", "3", "0", `-`, `` + "7", "3", "1", "0", `-`, `` + "31", "1", "3", "0", `-`, `` + "32", "1", "1", "0", ``, `p.1` + "33", "1", "2", "0", ``, `p.1` + "42", "2", "3", "0", ``, `p.2` + "43", "1", "1", "0", `*`, `` + "44", "1", "1", "1", `*`, `` + "79", "2", "2", "0", `d`, `` + + See section :doc:`api/crossmap` for a detailed description. @@ -279,7 +317,7 @@ The ``Locus`` class ^^^^^^^^^^^^^^^^^^^ The ``Locus`` class is used to deal with offsets with respect to a single -locus. +locus. .. code:: python @@ -288,13 +326,13 @@ locus. This class provides the functions ``to_position()`` and ``to_coordinate()`` for converting from a locus position to a coordinate and vice versa. These -functions work with a 2-tuple, see the section about `The NonCoding class`_ +functions work with a 2-key dictionary, see the section about `The NonCoding class`_ for the semantics. .. code:: python >>> locus.to_position(9) - (1, -1) + {'position':1, 'offset':-1} For loci that reside on the reverse complement strand, the optional ``inverted`` constructor parameter should be set to ``True``. @@ -317,8 +355,8 @@ The interface to this class is similar to that of the ``Locus`` class. .. code:: python >>> multilocus.to_position(22) - (10, 3) + {'position':10, 'offset':3, 'region':''} >>> multilocus.to_position(38) - (11, -2) + {'position':11, 'offset':-2, 'region':''} See section :doc:`api/multi_locus` for a detailed description. From fba6b95312134da10a4b5b41c0e74c9829dbe2c8 Mon Sep 17 00:00:00 2001 From: "X.Liu" <34545147+XLIU-hub@users.noreply.github.com> Date: Fri, 20 Mar 2026 14:16:35 +0100 Subject: [PATCH 080/127] Use python code block in library.rst --- docs/library.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/library.rst b/docs/library.rst index 032b5c8..263d463 100644 --- a/docs/library.rst +++ b/docs/library.rst @@ -16,7 +16,7 @@ Genomic Position Model Genomic positions follow the HGVS genomic coordinate system. They are represented as 1-key dictionaries. Below is an example of `g.1` in HGVS. -.. code-block:: json +.. code-block:: python {'position':1} From ca45f679b380a4b658d1896532eddd93a766bc7e Mon Sep 17 00:00:00 2001 From: "X.Liu" <34545147+XLIU-hub@users.noreply.github.com> Date: Fri, 20 Mar 2026 14:17:47 +0100 Subject: [PATCH 081/127] Update library.rst --- docs/library.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/library.rst b/docs/library.rst index 263d463..5d84c9e 100644 --- a/docs/library.rst +++ b/docs/library.rst @@ -58,7 +58,7 @@ NonCoding Position Model Noncoding positions follow the HGVS ``n`` coordinate system. They are represented as 3-key dictionaries. Below is an example of ``n.14+1`` in HGVS. -.. code-block:: json +.. code-block:: python { 'position': 14, @@ -151,7 +151,7 @@ Coding Position Model Coding positions follow the HGVS ``c`` coordinate system. They are represented as 3-key dictionaries. Here is an example of ``c.*1+3``. -.. code-block:: json +.. code-block:: python { 'position': 1, @@ -228,7 +228,7 @@ Additionally, the functions ``coordinate_to_protein()`` and ``protein_to_coordinate()`` can be used. These functions use a 4-key dictionary to represent a protein position. Here is an example of ``p.1`` in HGVS. -.. code-block:: json +.. code-block:: python { 'position': 1, From e4e1642cd379323feba882e4ac75ecfdb7ed80de Mon Sep 17 00:00:00 2001 From: "X.Liu" <34545147+XLIU-hub@users.noreply.github.com> Date: Fri, 20 Mar 2026 14:32:24 +0100 Subject: [PATCH 082/127] Update library.rst --- docs/library.rst | 244 ++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 200 insertions(+), 44 deletions(-) diff --git a/docs/library.rst b/docs/library.rst index 5d84c9e..1f5ac9a 100644 --- a/docs/library.rst +++ b/docs/library.rst @@ -124,18 +124,56 @@ parameter should be set to ``True``. In our example, HGVS position "g.36" 35 In the following table, we show a number of annotated examples. -.. csv-table:: - :class: table-scroll - :header: "Coordinate", "Position", "Offset", "Region", "HGVS" - - "0", "5", "0", `u`, `n.u5` - "4", "1", "0", `u`, `n.u1` - "5", "1", "0", `""`, `n.1` - "24", "9", "5", `""`, `n.9+5` - "25", "10", "-5", `""`, `n.10-5` - "71", "22", "0", `""`, `n.22` - "72", "1", "0", `d`, `n.d1` - "79", "8", "0", `d`, `n.d8` + +.. _table_noncoding: +.. list-table:: Coordinates to Noncoding Positions mapping. + :header-rows: 1 + + * - Coordinate + - Position + - Offset + - Region + - HGVS + * - 0 + - 5 + - 0 + - ``u`` + - ``n.u5`` + * - 4 + - 1 + - 0 + - ``u`` + - ``n.u1`` + * - 5 + - 1 + - 0 + - `` + - ``n.1`` + * - 24 + - 9 + - 5 + - ```` + - ``n.9+5`` + * - 25 + - 10 + - -5 + - ```` + - ``n.10-5`` + * - 71 + - 22 + - 0 + - ```` + - ``n.22`` + * - 72 + - 1 + - 0 + - ``d`` + - ``n.d1`` + * - 79 + - 8 + - 0 + - ``d`` + - ``n.d8`` See section :doc:`api/crossmap` for a detailed description. @@ -203,22 +241,75 @@ longer described using the ``'u'`` or ``'d'`` notation. In the following table, we show a number of annotated examples. -.. csv-table:: - :class: - :header: "Coordinate", "Position", "Offset", "Region", "HGVS" - - "0", "5", "0", `u`, `c.u5` - "4", "1", "0", `u`, `c.u1` - "5", "11", "0", `-`, `c.-11` - "24", "3", "5", `-`, `c.-3+5` - "31", "1", "0", `-`, `c.-1` - "32", "1", "0", `""`, `c.1` - "37", "3", "3", `""`, `c.3+3` - "38", "4", "-2", `""`, `c.4-2` - "43", "1", "0", `*`, `c.*1` - "61", "4", "-9", `*`, `c.*4+9` - "71", "5", "0", `*`, `c.*5` - "79", "8", "0", `d`, `c.d8` +.. _table_coding: +.. list-table:: Coordinates to Coding Positions mapping + :header-rows: 1 + + * - Coordinate + - Position + - Offset + - Region + - HGVS + * - 0 + - 5 + - 0 + - ``u`` + - ``c.u5`` + * - 4 + - 1 + - 0 + - ``u`` + - ``c.u1`` + * - 5 + - 11 + - 0 + - ``-`` + - ``c.-11`` + * - 24 + - 3 + - 5 + - ``-`` + - ``c.-3+5`` + * - 31 + - 1 + - 0 + - ``-`` + - ``c.-1`` + * - 32 + - 1 + - 0 + - `` + - ``c.1`` + * - 37 + - 3 + - 3 + - `` + - ``c.3+3`` + * - 38 + - 4 + - -2 + - `` + - ``c.4-2`` + * - 43 + - 1 + - 0 + - ``*`` + - ``c.*1`` + * - 61 + - 4 + - -9 + - ``*`` + - ``c.*4+9`` + * - 71 + - 5 + - 0 + - ``*`` + - ``c.*5`` + * - 79 + - 8 + - 0 + - ``d`` + - ``c.d8`` Protein @@ -259,22 +350,87 @@ Note that the protein position only corresponds with the HGVS "p." notation when the offset equals ``0`` and the region equals ``1``. In the following table, we show a number of annotated examples. -.. csv-table:: - :class: table-scroll - :header: "Coordinate", "Position", "position_in_codon", "Offset", "Region", "HGVS" - - "0", "4", "2", "0", `u`, `` - "4", "4", "2", "0", `u`, `` - "5", "4", "2", "0", `-`, `` - "6", "4", "3", "0", `-`, `` - "7", "3", "1", "0", `-`, `` - "31", "1", "3", "0", `-`, `` - "32", "1", "1", "0", ``, `p.1` - "33", "1", "2", "0", ``, `p.1` - "42", "2", "3", "0", ``, `p.2` - "43", "1", "1", "0", `*`, `` - "44", "1", "1", "1", `*`, `` - "79", "2", "2", "0", `d`, `` +.. list-table::Coordinates to Protein Positions mapping + :header-rows: 1 + + * - Coordinate + - Position + - position_in_codon + - Offset + - Region + - HGVS + * - 0 + - 4 + - 2 + - 0 + - ``u`` + - `` + * - 4 + - 4 + - 2 + - 0 + - ``u`` + - `` + * - 5 + - 4 + - 2 + - 0 + - ``-`` + - `` + * - 6 + - 4 + - 3 + - 0 + - ``-`` + - `` + * - 7 + - 3 + - 1 + - 0 + - ``-`` + - `` + * - 31 + - 1 + - 3 + - 0 + - ``-`` + - `` + * - 32 + - 1 + - 1 + - 0 + - `` + - ``p.1`` + * - 33 + - 1 + - 2 + - 0 + - `` + - ``p.1`` + * - 42 + - 2 + - 3 + - 0 + - `` + - ``p.2`` + * - 43 + - 1 + - 1 + - 0 + - ``*`` + - `` + * - 44 + - 1 + - 1 + - 1 + - ``*`` + - `` + * - 79 + - 2 + - 2 + - 0 + - ``d`` + - `` From b630851a155488efbd369bf7b0ec942b9007404d Mon Sep 17 00:00:00 2001 From: "X.Liu" <34545147+XLIU-hub@users.noreply.github.com> Date: Fri, 20 Mar 2026 14:38:56 +0100 Subject: [PATCH 083/127] Update library.rst --- docs/library.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/library.rst b/docs/library.rst index 1f5ac9a..6d103a9 100644 --- a/docs/library.rst +++ b/docs/library.rst @@ -96,7 +96,7 @@ position "n.14+1". We can convert between these two as follows. >>> crossmap.noncoding_to_coordinate({'position':14, 'offset':1, 'region':''}) {'position':14, 'offset':1, 'region':''} -When the coordinate is upstream or downstream of the transcript, we use ``'u`` to +When the coordinate is upstream or downstream of the transcript, we use ``'u'`` to present upstream and ``'d'`` to present downstream. .. code:: python @@ -152,17 +152,17 @@ In the following table, we show a number of annotated examples. * - 24 - 9 - 5 - - ```` + - `` - ``n.9+5`` * - 25 - 10 - -5 - - ```` + - `` - ``n.10-5`` * - 71 - 22 - 0 - - ```` + - `` - ``n.22`` * - 72 - 1 From a0514c8ad17b2de550272be1e9bdd4e70cb8e56b Mon Sep 17 00:00:00 2001 From: "X.Liu" <34545147+XLIU-hub@users.noreply.github.com> Date: Fri, 20 Mar 2026 14:44:22 +0100 Subject: [PATCH 084/127] Update library.rst --- docs/library.rst | 132 +++++++++++++++++++++++------------------------ 1 file changed, 66 insertions(+), 66 deletions(-) diff --git a/docs/library.rst b/docs/library.rst index 6d103a9..4e84d7e 100644 --- a/docs/library.rst +++ b/docs/library.rst @@ -137,43 +137,43 @@ In the following table, we show a number of annotated examples. * - 0 - 5 - 0 - - ``u`` - - ``n.u5`` + - u`` + - n.u5`` * - 4 - 1 - 0 - - ``u`` - - ``n.u1`` + - u`` + - n.u1`` * - 5 - 1 - 0 - - `` - - ``n.1`` + - + - n.1`` * - 24 - 9 - 5 - - `` - - ``n.9+5`` + - + - n.9+5`` * - 25 - 10 - -5 - - `` - - ``n.10-5`` + - + - n.10-5`` * - 71 - 22 - 0 - - `` - - ``n.22`` + - + - n.22`` * - 72 - 1 - 0 - - ``d`` - - ``n.d1`` + - d`` + - n.d1`` * - 79 - 8 - 0 - - ``d`` - - ``n.d8`` + - d`` + - n.d8`` See section :doc:`api/crossmap` for a detailed description. @@ -253,63 +253,63 @@ In the following table, we show a number of annotated examples. * - 0 - 5 - 0 - - ``u`` - - ``c.u5`` + - u`` + - c.u5`` * - 4 - 1 - 0 - - ``u`` - - ``c.u1`` + - u`` + - c.u1`` * - 5 - 11 - 0 - - ``-`` - - ``c.-11`` + - -`` + - c.-11`` * - 24 - 3 - 5 - - ``-`` - - ``c.-3+5`` + - -`` + - c.-3+5`` * - 31 - 1 - 0 - - ``-`` - - ``c.-1`` + - -`` + - c.-1`` * - 32 - 1 - 0 - - `` - - ``c.1`` + - + - c.1`` * - 37 - 3 - 3 - - `` - - ``c.3+3`` + - + - c.3+3`` * - 38 - 4 - -2 - - `` - - ``c.4-2`` + - + - c.4-2`` * - 43 - 1 - 0 - - ``*`` - - ``c.*1`` + - *`` + - c.*1`` * - 61 - 4 - -9 - - ``*`` - - ``c.*4+9`` + - *`` + - c.*4+9`` * - 71 - 5 - 0 - - ``*`` - - ``c.*5`` + - *`` + - c.*5`` * - 79 - 8 - 0 - - ``d`` - - ``c.d8`` + - d`` + - c.d8`` Protein @@ -343,14 +343,14 @@ position "p.2". We can convert between these to as follows. >>> crossmap.coordinate_to_protein(41) {'position':2, 'position_in_codon':2, 'offset':0, 'region':''} - >>> crossmap.protein_to_coordinate({'position':2, 'position_in_codon':2, offset':0, 'region':''}) + >>> crossmap.protein_to_coordinate({'position':2, 'position_in_codon':2, 'offset':0, 'region':''}) 41 Note that the protein position only corresponds with the HGVS "p." notation when the offset equals ``0`` and the region equals ``1``. In the following table, we show a number of annotated examples. -.. list-table::Coordinates to Protein Positions mapping +.. list-table:: Coordinates to Protein Positions mapping :header-rows: 1 * - Coordinate @@ -363,74 +363,74 @@ table, we show a number of annotated examples. - 4 - 2 - 0 - - ``u`` - - `` + - u`` + - * - 4 - 4 - 2 - 0 - - ``u`` - - `` + - u`` + - * - 5 - 4 - 2 - 0 - - ``-`` - - `` + - -`` + - * - 6 - 4 - 3 - 0 - - ``-`` - - `` + - -`` + - * - 7 - 3 - 1 - 0 - - ``-`` - - `` + - -`` + - * - 31 - 1 - 3 - 0 - - ``-`` - - `` + - -`` + - * - 32 - 1 - 1 - 0 - - `` - - ``p.1`` + - + - p.1`` * - 33 - 1 - 2 - 0 - - `` - - ``p.1`` + - + - p.1`` * - 42 - 2 - 3 - 0 - - `` - - ``p.2`` + - + - p.2`` * - 43 - 1 - 1 - 0 - - ``*`` - - `` + - *`` + - * - 44 - 1 - 1 - 1 - - ``*`` - - `` + - *`` + - * - 79 - 2 - 2 - 0 - - ``d`` - - `` + - d`` + - From 21ce8dd377781aa13fad3edb238c9f1b853d6f28 Mon Sep 17 00:00:00 2001 From: "X.Liu" <34545147+XLIU-hub@users.noreply.github.com> Date: Fri, 20 Mar 2026 14:50:25 +0100 Subject: [PATCH 085/127] Update library.rst --- docs/library.rst | 84 ++++++++++++++++++++++++------------------------ 1 file changed, 42 insertions(+), 42 deletions(-) diff --git a/docs/library.rst b/docs/library.rst index 4e84d7e..4884f84 100644 --- a/docs/library.rst +++ b/docs/library.rst @@ -137,23 +137,23 @@ In the following table, we show a number of annotated examples. * - 0 - 5 - 0 - - u`` - - n.u5`` + - ``u`` + - ``n.u5`` * - 4 - 1 - 0 - - u`` - - n.u1`` + - ``u`` + - ``n.u1`` * - 5 - 1 - 0 - - - n.1`` + - ``n.1`` * - 24 - 9 - 5 - - - n.9+5`` + - ``n.9+5`` * - 25 - 10 - -5 @@ -163,17 +163,17 @@ In the following table, we show a number of annotated examples. - 22 - 0 - - - n.22`` + - ``n.22`` * - 72 - 1 - 0 - - d`` - - n.d1`` + - ``d`` + - ``n.d1`` * - 79 - 8 - 0 - - d`` - - n.d8`` + - ``d`` + - ``n.d8`` See section :doc:`api/crossmap` for a detailed description. @@ -253,43 +253,43 @@ In the following table, we show a number of annotated examples. * - 0 - 5 - 0 - - u`` - - c.u5`` + - ``u`` + - ``c.u5`` * - 4 - 1 - 0 - - u`` - - c.u1`` + - ``u`` + - ``c.u1`` * - 5 - 11 - 0 - - -`` - - c.-11`` + - ``-`` + - ``c.-11`` * - 24 - 3 - 5 - - -`` - - c.-3+5`` + - ``-`` + - ``c.-3+5`` * - 31 - 1 - 0 - - -`` - - c.-1`` + - ``-`` + - ``c.-1`` * - 32 - 1 - 0 - - - c.1`` + - ``c.1`` * - 37 - 3 - 3 - - - c.3+3`` + - ``c.3+3`` * - 38 - 4 - -2 - - - c.4-2`` + - ``c.4-2`` * - 43 - 1 - 0 @@ -298,18 +298,18 @@ In the following table, we show a number of annotated examples. * - 61 - 4 - -9 - - *`` - - c.*4+9`` + - ``*`` + - ``c.*4+9`` * - 71 - 5 - 0 - - *`` - - c.*5`` + - ``*`` + - ``c.*5`` * - 79 - 8 - 0 - - d`` - - c.d8`` + - ``d`` + - ``c.d8`` Protein @@ -363,73 +363,73 @@ table, we show a number of annotated examples. - 4 - 2 - 0 - - u`` + - ``u`` - * - 4 - 4 - 2 - 0 - - u`` + - ``u`` - * - 5 - 4 - 2 - 0 - - -`` + - ``-`` - * - 6 - 4 - 3 - 0 - - -`` + - ``-`` - * - 7 - 3 - 1 - 0 - - -`` + - ``-`` - * - 31 - 1 - 3 - 0 - - -`` + - ``-`` - * - 32 - 1 - 1 - 0 - - - p.1`` + - ``p.1`` * - 33 - 1 - 2 - 0 - - - p.1`` + - ``p.1`` * - 42 - 2 - 3 - 0 - - - p.2`` + - ``p.2`` * - 43 - 1 - 1 - 0 - - *`` + - ``*`` - * - 44 - 1 - 1 - 1 - - *`` + - ``*`` - * - 79 - 2 - 2 - 0 - - d`` + - ``d`` - From 6fc7709b76452125225decc66f463879d79d7b0b Mon Sep 17 00:00:00 2001 From: "X.Liu" <34545147+XLIU-hub@users.noreply.github.com> Date: Fri, 20 Mar 2026 14:59:11 +0100 Subject: [PATCH 086/127] Update library.rst --- docs/library.rst | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/library.rst b/docs/library.rst index 4884f84..ab46719 100644 --- a/docs/library.rst +++ b/docs/library.rst @@ -158,7 +158,7 @@ In the following table, we show a number of annotated examples. - 10 - -5 - - - n.10-5`` + - ``n.10-5`` * - 71 - 22 - 0 @@ -293,8 +293,8 @@ In the following table, we show a number of annotated examples. * - 43 - 1 - 0 - - *`` - - c.*1`` + - ``*`` + - ``c.*1`` * - 61 - 4 - -9 @@ -350,6 +350,7 @@ Note that the protein position only corresponds with the HGVS "p." notation when the offset equals ``0`` and the region equals ``1``. In the following table, we show a number of annotated examples. +.. _table_protein: .. list-table:: Coordinates to Protein Positions mapping :header-rows: 1 From 1693bab2ede6ade8964a9e9e6c0ca2414ed44fd0 Mon Sep 17 00:00:00 2001 From: "X.Liu" <34545147+XLIU-hub@users.noreply.github.com> Date: Fri, 20 Mar 2026 15:43:35 +0100 Subject: [PATCH 087/127] Update library.rst --- docs/library.rst | 41 ++++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/docs/library.rst b/docs/library.rst index ab46719..102dbb2 100644 --- a/docs/library.rst +++ b/docs/library.rst @@ -147,22 +147,22 @@ In the following table, we show a number of annotated examples. * - 5 - 1 - 0 - - + - - ``n.1`` * - 24 - 9 - 5 - - + - - ``n.9+5`` * - 25 - 10 - -5 - - + - - ``n.10-5`` * - 71 - 22 - 0 - - + - - ``n.22`` * - 72 - 1 @@ -278,17 +278,17 @@ In the following table, we show a number of annotated examples. * - 32 - 1 - 0 - - + - - ``c.1`` * - 37 - 3 - 3 - - + - - ``c.3+3`` * - 38 - 4 - -2 - - + - - ``c.4-2`` * - 43 - 1 @@ -365,74 +365,73 @@ table, we show a number of annotated examples. - 2 - 0 - ``u`` - - + - * - 4 - 4 - 2 - 0 - ``u`` - - + - * - 5 - 4 - 2 - 0 - ``-`` - - + - * - 6 - 4 - 3 - 0 - ``-`` - - + - * - 7 - 3 - 1 - 0 - ``-`` - - + - * - 31 - 1 - 3 - 0 - ``-`` - - + - * - 32 - 1 - 1 - 0 - - + - - ``p.1`` * - 33 - 1 - 2 - 0 - - + - - ``p.1`` * - 42 - 2 - 3 - 0 - - + - - ``p.2`` * - 43 - 1 - 1 - 0 - ``*`` - - + - * - 44 - 1 - 1 - 1 - ``*`` - - + - * - 79 - 2 - 2 - 0 - ``d`` - - - + - See section :doc:`api/crossmap` for a detailed description. @@ -471,7 +470,7 @@ The ``Coding`` class makes use of a number of basic classes described in this section. The ``Locus`` class -^^^^^^^^^^^^^^^^^^^ +~~~~~~~~~~~~~~~~~~~ The ``Locus`` class is used to deal with offsets with respect to a single locus. From 9bb4501e04a1c5025f2fdba6e4e59cca7746be8d Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Fri, 20 Mar 2026 15:56:55 +0100 Subject: [PATCH 088/127] Add backticks --- docs/library.rst | 278 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 217 insertions(+), 61 deletions(-) diff --git a/docs/library.rst b/docs/library.rst index 032b5c8..67bbc75 100644 --- a/docs/library.rst +++ b/docs/library.rst @@ -14,9 +14,9 @@ Genomic Position Model ~~~~~~~~~~~~~~~~~~~~~~~ Genomic positions follow the HGVS genomic coordinate system. -They are represented as 1-key dictionaries. Below is an example of `g.1` in HGVS. +They are represented as 1-key dictionaries. Below is an example of ``g.1`` in HGVS. -.. code-block:: json +.. code-block:: python {'position':1} @@ -58,7 +58,7 @@ NonCoding Position Model Noncoding positions follow the HGVS ``n`` coordinate system. They are represented as 3-key dictionaries. Below is an example of ``n.14+1`` in HGVS. -.. code-block:: json +.. code-block:: python { 'position': 14, @@ -86,8 +86,8 @@ NonCoding Position Conversion Now the functions ``coordinate_to_noncoding()`` and ``noncoding_to_coordinate()`` can be used. -In our example, the HGVS position "g.36" (coordinate ``35``) is equivalent to -position "n.14+1". We can convert between these two as follows. +In our example, the HGVS position ``g.36`` (coordinate `35`) is equivalent to +position ``n.14+1``. We can convert between these two as follows. .. code:: python @@ -96,7 +96,7 @@ position "n.14+1". We can convert between these two as follows. >>> crossmap.noncoding_to_coordinate({'position':14, 'offset':1, 'region':''}) {'position':14, 'offset':1, 'region':''} -When the coordinate is upstream or downstream of the transcript, we use ``'u`` to +When the coordinate is upstream or downstream of the transcript, we use ``'u'`` to present upstream and ``'d'`` to present downstream. .. code:: python @@ -112,8 +112,8 @@ present upstream and ``'d'`` to present downstream. For transcripts that reside on the reverse complement strand, the ``inverted`` -parameter should be set to ``True``. In our example, HGVS position "g.36" -(coordinate ``35``) is now equivalent to position "n.9-1". +parameter should be set to ``True``. In our example, HGVS position ``g.36`` +(coordinate `35`) is now equivalent to position ``n.9-1``. .. code:: python @@ -124,18 +124,56 @@ parameter should be set to ``True``. In our example, HGVS position "g.36" 35 In the following table, we show a number of annotated examples. -.. csv-table:: - :class: table-scroll - :header: "Coordinate", "Position", "Offset", "Region", "HGVS" - - "0", "5", "0", `u`, `n.u5` - "4", "1", "0", `u`, `n.u1` - "5", "1", "0", `""`, `n.1` - "24", "9", "5", `""`, `n.9+5` - "25", "10", "-5", `""`, `n.10-5` - "71", "22", "0", `""`, `n.22` - "72", "1", "0", `d`, `n.d1` - "79", "8", "0", `d`, `n.d8` + +.. _table_noncoding: +.. list-table:: Coordinates to Noncoding Positions mapping. + :header-rows: 1 + + * - coordinate + - position + - offset + - region + - HGVS + * - 0 + - 5 + - 0 + - ``u`` + - ``n.u5`` + * - 4 + - 1 + - 0 + - ``u`` + - ``n.u1`` + * - 5 + - 1 + - 0 + - + - ``n.1`` + * - 24 + - 9 + - 5 + - + - ``n.9+5`` + * - 25 + - 10 + - -5 + - + - ``n.10-5`` + * - 71 + - 22 + - 0 + - + - ``n.22`` + * - 72 + - 1 + - 0 + - ``d`` + - ``n.d1`` + * - 79 + - 8 + - 0 + - ``d`` + - ``n.d8`` See section :doc:`api/crossmap` for a detailed description. @@ -151,7 +189,7 @@ Coding Position Model Coding positions follow the HGVS ``c`` coordinate system. They are represented as 3-key dictionaries. Here is an example of ``c.*1+3``. -.. code-block:: json +.. code-block:: python { 'position': 1, @@ -180,8 +218,8 @@ On top of the functionality provided by the ``NonCoding`` class, the functions ``coordinate_to_coding()`` and ``coding_to_coordinate()`` can be used. These functions use a 3-key dictionary to represent a coding position. -In our example, the HGVS position "g.32" (coordinate ``31``) is equivalent to -position "c.-1". We can convert between these two as follows. +In our example, the HGVS position ``g.32`` (coordinate `31`) is equivalent to +position ``c.-1``. We can convert between these two as follows. .. code:: python @@ -203,22 +241,75 @@ longer described using the ``'u'`` or ``'d'`` notation. In the following table, we show a number of annotated examples. -.. csv-table:: - :class: - :header: "Coordinate", "Position", "Offset", "Region", "HGVS" - - "0", "5", "0", `u`, `c.u5` - "4", "1", "0", `u`, `c.u1` - "5", "11", "0", `-`, `c.-11` - "24", "3", "5", `-`, `c.-3+5` - "31", "1", "0", `-`, `c.-1` - "32", "1", "0", `""`, `c.1` - "37", "3", "3", `""`, `c.3+3` - "38", "4", "-2", `""`, `c.4-2` - "43", "1", "0", `*`, `c.*1` - "61", "4", "-9", `*`, `c.*4+9` - "71", "5", "0", `*`, `c.*5` - "79", "8", "0", `d`, `c.d8` +.. _table_coding: +.. list-table:: Coordinates to Coding Positions mapping + :header-rows: 1 + + * - coordinate + - position + - offset + - region + - HGVS + * - 0 + - 5 + - 0 + - ``u`` + - ``c.u5`` + * - 4 + - 1 + - 0 + - ``u`` + - ``c.u1`` + * - 5 + - 11 + - 0 + - ``-`` + - ``c.-11`` + * - 24 + - 3 + - 5 + - ``-`` + - ``c.-3+5`` + * - 31 + - 1 + - 0 + - ``-`` + - ``c.-1`` + * - 32 + - 1 + - 0 + - + - ``c.1`` + * - 37 + - 3 + - 3 + - + - ``c.3+3`` + * - 38 + - 4 + - -2 + - + - ``c.4-2`` + * - 43 + - 1 + - 0 + - ``*`` + - ``c.*1`` + * - 61 + - 4 + - -9 + - ``*`` + - ``c.*4+9`` + * - 71 + - 5 + - 0 + - ``*`` + - ``c.*5`` + * - 79 + - 8 + - 0 + - ``d`` + - ``c.d8`` Protein @@ -228,7 +319,7 @@ Additionally, the functions ``coordinate_to_protein()`` and ``protein_to_coordinate()`` can be used. These functions use a 4-key dictionary to represent a protein position. Here is an example of ``p.1`` in HGVS. -.. code-block:: json +.. code-block:: python { 'position': 1, @@ -245,37 +336,102 @@ Where: - **region**: a string describing the region type (``''`` for standard positions) -In our example the HGVS position "g.42" (coordinate ``41``) corresponds with -position "p.2". We can convert between these to as follows. +In our example the HGVS position ``g.42`` (coordinate `41`) corresponds with +position ``p.2``. We can convert between these to as follows. .. code:: python >>> crossmap.coordinate_to_protein(41) {'position':2, 'position_in_codon':2, 'offset':0, 'region':''} - >>> crossmap.protein_to_coordinate({'position':2, 'position_in_codon':2, offset':0, 'region':''}) + >>> crossmap.protein_to_coordinate({'position':2, 'position_in_codon':2, 'offset':0, 'region':''}) 41 Note that the protein position only corresponds with the HGVS "p." notation when the offset equals ``0`` and the region equals ``1``. In the following table, we show a number of annotated examples. -.. csv-table:: - :class: table-scroll - :header: "Coordinate", "Position", "position_in_codon", "Offset", "Region", "HGVS" - - "0", "4", "2", "0", `u`, `` - "4", "4", "2", "0", `u`, `` - "5", "4", "2", "0", `-`, `` - "6", "4", "3", "0", `-`, `` - "7", "3", "1", "0", `-`, `` - "31", "1", "3", "0", `-`, `` - "32", "1", "1", "0", ``, `p.1` - "33", "1", "2", "0", ``, `p.1` - "42", "2", "3", "0", ``, `p.2` - "43", "1", "1", "0", `*`, `` - "44", "1", "1", "1", `*`, `` - "79", "2", "2", "0", `d`, `` - +.. _table_protein: +.. list-table:: Coordinates to Protein Positions mapping + :header-rows: 1 + + * - coordinate + - position + - position_in_codon + - offset + - region + - HGVS + * - 0 + - 4 + - 2 + - 0 + - ``u`` + - + * - 4 + - 4 + - 2 + - 0 + - ``u`` + - + * - 5 + - 4 + - 2 + - 0 + - ``-`` + - + * - 6 + - 4 + - 3 + - 0 + - ``-`` + - + * - 7 + - 3 + - 1 + - 0 + - ``-`` + - + * - 31 + - 1 + - 3 + - 0 + - ``-`` + - + * - 32 + - 1 + - 1 + - 0 + - + - ``p.1`` + * - 33 + - 1 + - 2 + - 0 + - + - ``p.1`` + * - 42 + - 2 + - 3 + - 0 + - + - ``p.2`` + * - 43 + - 1 + - 1 + - 0 + - ``*`` + - + * - 44 + - 1 + - 1 + - 1 + - ``*`` + - + * - 79 + - 2 + - 2 + - 0 + - ``d`` + - See section :doc:`api/crossmap` for a detailed description. @@ -314,7 +470,7 @@ The ``Coding`` class makes use of a number of basic classes described in this section. The ``Locus`` class -^^^^^^^^^^^^^^^^^^^ +~~~~~~~~~~~~~~~~~~~ The ``Locus`` class is used to deal with offsets with respect to a single locus. From 691ec354aaad6e7b0ee8d63a826ead3bdf9bbe6d Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Fri, 20 Mar 2026 17:01:30 +0100 Subject: [PATCH 089/127] Cleanup --- docs/library.rst | 66 +++++++++++++++++++++++++++++------------------- 1 file changed, 40 insertions(+), 26 deletions(-) diff --git a/docs/library.rst b/docs/library.rst index 67bbc75..a1c236e 100644 --- a/docs/library.rst +++ b/docs/library.rst @@ -8,7 +8,7 @@ The ``Genomic`` class --------------------- The ``Genomic`` class provides an interface to conversions between genomic -(``g.``, ``m``, ``n``) positions and coordinates. +(``g.``, ``m.``, ``n.``) positions and coordinates. Genomic Position Model ~~~~~~~~~~~~~~~~~~~~~~~ @@ -22,7 +22,7 @@ They are represented as 1-key dictionaries. Below is an example of ``g.1`` in HG Where: -- **position**: a positive integer(>0) +- **position**: a positive integer repersenting a base position(>0) Genomic Position Conversion ~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -55,7 +55,7 @@ systems should be done via a coordinate. NonCoding Position Model ~~~~~~~~~~~~~~~~~~~~~~~~ -Noncoding positions follow the HGVS ``n`` coordinate system. They are represented +Noncoding positions follow the HGVS ``n.`` coordinate system. They are represented as 3-key dictionaries. Below is an example of ``n.14+1`` in HGVS. .. code-block:: python @@ -200,9 +200,10 @@ represented as 3-key dictionaries. Here is an example of ``c.*1+3``. Where: - **position**: an interger representing a transcript position (>0) -- **offset**: an integer indicating the offset relative to the position -- **region**: a string describing the region type (`''` for standard coding positions, - `'-'` for 5' UTR, `'*'` for 3' UTR, `'u'` for upstream and ``'d'`` for downstream) +- **offset**: an integer indicating the offset relative to the position (negative for upstream, + positive for downstream) +- **region**: a string describing the region type (``''`` for standard coding positions, + ``'-'`` for 5' UTR, ``'*'`` for 3' UTR, ``'u'`` for upstream and ``'d'`` for downstream) Coding Position Conversion ~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -230,7 +231,8 @@ position ``c.-1``. We can convert between these two as follows. The ``coordinate_to_coding()`` function accepts an optional ``degenerate`` argument. When set to ``True``, positions outside of the transcript are no -longer described using the ``'u'`` or ``'d'`` notation. +longer described using the ``'u'`` or ``'d'`` notation, ``'-'`` and ``'*'`` +are used instead. .. code:: python @@ -299,7 +301,7 @@ In the following table, we show a number of annotated examples. - 4 - -9 - ``*`` - - ``c.*4+9`` + - ``c.*4-9`` * - 71 - 5 - 0 @@ -365,37 +367,37 @@ table, we show a number of annotated examples. - 2 - 0 - ``u`` - - + - invalid * - 4 - 4 - 2 - 0 - ``u`` - - + - invalid * - 5 - 4 - 2 - 0 - ``-`` - - - * - 6 - - 4 + - invalid + * - 7 - 3 + - 1 - 0 - ``-`` - - - * - 7 + - invalid + * - 8 - 3 - 1 - - 0 + - 1 - ``-`` - - + - invalid * - 31 - 1 - 3 - 0 - ``-`` - - + - invalid * - 32 - 1 - 1 @@ -419,19 +421,19 @@ table, we show a number of annotated examples. - 1 - 0 - ``*`` - - + - invalid * - 44 - 1 - 1 - 1 - ``*`` - - + - invalid * - 79 - 2 - 2 - 0 - ``d`` - - + - invalid See section :doc:`api/crossmap` for a detailed description. @@ -474,6 +476,9 @@ The ``Locus`` class The ``Locus`` class is used to deal with offsets with respect to a single locus. +**Note:** the ``position`` values in the position dictionaries are **0-based**, +so the first base of the locus corresponds to ``{'position': 0, 'offset': 0}``. +This differs from HGVS numbering, which is **1-based**. .. code:: python @@ -488,7 +493,9 @@ for the semantics. .. code:: python >>> locus.to_position(9) - {'position':1, 'offset':-1} + {'position':0, 'offset':-1} + >>> locus.to_coordinate({'position':0, 'offset':-1}) + {'position':0, 'offset':-1} For loci that reside on the reverse complement strand, the optional ``inverted`` constructor parameter should be set to ``True``. @@ -499,20 +506,27 @@ The ``MultiLocus`` class ^^^^^^^^^^^^^^^^^^^^^^^^ The ``MultiLocus`` class is used to deal with offsets with respect to multiple -loci. +loci. Its positions is .. code:: python >>> from mutalyzer_crossmapper import MultiLocus >>> multilocus = MultiLocus([(10, 20), (40, 50)]) -The interface to this class is similar to that of the ``Locus`` class. +The interface to this class is similar to that of the ``Locus`` class. Functions +``to_position()`` and ``to_coordinate()`` work with a 3-key dictionary. + +**Note:** again, the ``position`` values in the position dictionaries are **0-based**. .. code:: python >>> multilocus.to_position(22) - {'position':10, 'offset':3, 'region':''} + {'position':9, 'offset':3, 'region':''} + >>> multilocus.to_coordinate({'position':9, 'offset':3, 'region':''}) + 22 >>> multilocus.to_position(38) - {'position':11, 'offset':-2, 'region':''} + {'position':10, 'offset':-2, 'region':''} + >>> multilocus.to_coordinate({'position':10, 'offset':-2, 'region':''} + 38 See section :doc:`api/multi_locus` for a detailed description. From bcf7bd21fe0a0a2a98f123db351417f9ffb48aed Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Fri, 20 Mar 2026 17:09:01 +0100 Subject: [PATCH 090/127] Cleanup, use '' in dictionary --- mutalyzer_crossmapper/crossmapper.py | 4 +- mutalyzer_crossmapper/locus.py | 16 +- mutalyzer_crossmapper/multi_locus.py | 28 +-- tests/helper.py | 19 +- tests/test_crossmapper.py | 276 +++++++++++++-------------- tests/test_locus.py | 32 ++-- tests/test_multi_locus.py | 84 ++++---- 7 files changed, 224 insertions(+), 235 deletions(-) diff --git a/mutalyzer_crossmapper/crossmapper.py b/mutalyzer_crossmapper/crossmapper.py index 3994271..e68bbbd 100644 --- a/mutalyzer_crossmapper/crossmapper.py +++ b/mutalyzer_crossmapper/crossmapper.py @@ -130,7 +130,7 @@ def _coordinate_to_coding(self, coordinate): """ noncoding_pos_m = self._noncoding.to_position(coordinate) - if noncoding_pos_m['region'] in ['u', 'd']: + if noncoding_pos_m['region'] in ('u', 'd'): return noncoding_pos_m location = noncoding_pos_m['position'] @@ -178,7 +178,7 @@ def _coding_to_coordinate(self, pos_m): position = pos_m['position'] region = pos_m['region'] - if region in ['u', 'd']: + if region in ('u', 'd'): return self._noncoding.to_coordinate(pos_m) noncoding_pos_m = {'offset': pos_m['offset'], 'region': ''} diff --git a/mutalyzer_crossmapper/locus.py b/mutalyzer_crossmapper/locus.py index 738e87c..eb23efd 100644 --- a/mutalyzer_crossmapper/locus.py +++ b/mutalyzer_crossmapper/locus.py @@ -19,16 +19,16 @@ def to_position(self, coordinate): """ if self._inverted: if coordinate > self.boundary[1]: - return {"position": 0, "offset": self.boundary[1] - coordinate} + return {'position': 0, 'offset': self.boundary[1] - coordinate} if coordinate < self.boundary[0]: - return {"position": self._end, "offset": self.boundary[0] - coordinate} - return {"position": self.boundary[1] - coordinate, "offset": 0} + return {'position': self._end, 'offset': self.boundary[0] - coordinate} + return {'position': self.boundary[1] - coordinate, 'offset': 0} if coordinate < self.boundary[0]: - return {"position": 0, "offset": coordinate - self.boundary[0]} + return {'position': 0, 'offset': coordinate - self.boundary[0]} if coordinate > self.boundary[1]: - return {"position": self._end, "offset": coordinate - self.boundary[1]} - return {"position": coordinate - self.boundary[0], "offset": 0} + return {'position': self._end, 'offset': coordinate - self.boundary[1]} + return {'position': coordinate - self.boundary[0], 'offset': 0} def to_coordinate(self, pos_m): """Convert a position model to a coordinate. @@ -38,5 +38,5 @@ def to_coordinate(self, pos_m): :returns int: Coordinate. """ if self._inverted: - return self.boundary[1] - pos_m["position"] - pos_m["offset"] - return self.boundary[0] + pos_m["position"] + pos_m["offset"] + return self.boundary[1] - pos_m['position'] - pos_m['offset'] + return self.boundary[0] + pos_m['position'] + pos_m['offset'] diff --git a/mutalyzer_crossmapper/multi_locus.py b/mutalyzer_crossmapper/multi_locus.py index 861e5cd..c76a39b 100644 --- a/mutalyzer_crossmapper/multi_locus.py +++ b/mutalyzer_crossmapper/multi_locus.py @@ -58,20 +58,20 @@ def to_position(self, coordinate:int): """ index = nearest_location(self._locations, coordinate, self._inverted) outside = self._orientation * self.outside(coordinate) - region = "u" if outside < 0 else "d" if outside > 0 else "" + region = 'u' if outside < 0 else 'd' if outside > 0 else '' location = self._loci[index].to_position(coordinate) if outside: return { - "position": abs(location["offset"]), - "offset": 0, - "region": region + 'position': abs(location['offset']), + 'offset': 0, + 'region': region } return { - "position": location["position"] + self._offsets[self._direction(index)], - "offset": location["offset"], - "region": region + 'position': location['position'] + self._offsets[self._direction(index)], + 'offset': location['offset'], + 'region': region } def to_coordinate(self, pos_m:dict): @@ -81,19 +81,19 @@ def to_coordinate(self, pos_m:dict): :returns int: Coordinate. """ - region = pos_m["region"] + region = pos_m['region'] - if pos_m["region"] in ("u", "d"): - is_upstream = region == "u" + if pos_m['region'] in ('u', 'd'): + is_upstream = region == 'u' if self._inverted: is_upstream = not is_upstream if is_upstream: - return self._locations[0][0] - abs(pos_m["position"]) + pos_m["offset"] - return abs(pos_m["position"]) + self._locations[-1][1] + pos_m["offset"] - 1 + return self._locations[0][0] - abs(pos_m['position']) + pos_m['offset'] + return abs(pos_m['position']) + self._locations[-1][1] + pos_m['offset'] - 1 index = min( len(self._offsets), - max(0, bisect_right(self._offsets, pos_m["position"]) - 1) + max(0, bisect_right(self._offsets, pos_m['position']) - 1) ) - locus_pos_m = {**pos_m, "position": pos_m["position"] - self._offsets[index]} + locus_pos_m = {**pos_m, 'position': pos_m['position'] - self._offsets[index]} return self._loci[self._direction(index)].to_coordinate(locus_pos_m) diff --git a/tests/helper.py b/tests/helper.py index 9e51367..b3ca042 100644 --- a/tests/helper.py +++ b/tests/helper.py @@ -2,19 +2,8 @@ def invariant(f, x, f_i, y): assert f(x) == y assert f_i(y) == x -def degenerate_equal(f, coordinate, locations): - results = [f(loc) for loc in locations] - - # First condition: first maps correctly - assert results[0] == coordinate, ( - f"\nFirst location: {locations[0]}" - f"\nExpected: {coordinate}" - f"\nGot: {results[0]}" - ) - # Second condition: all map to same coordinate - assert len(set(results)) == 1, ( - f"\nLocations: {locations}" - f"\nResults: {results}" - f"\nExpected all to map to the same coordinate" - ) \ No newline at end of file +def degenerate_equal(f, coordinate, locations): + assert f(locations[0]) == coordinate + assert len( + set(map(f, locations))) == 1 \ No newline at end of file diff --git a/tests/test_crossmapper.py b/tests/test_crossmapper.py index a3bc5f7..97c6262 100644 --- a/tests/test_crossmapper.py +++ b/tests/test_crossmapper.py @@ -14,13 +14,13 @@ def test_Genomic(): crossmap.coordinate_to_genomic, 0, crossmap.genomic_to_coordinate, - {"position": 1}, + {'position': 1}, ) invariant( crossmap.coordinate_to_genomic, 98, crossmap.genomic_to_coordinate, - {"position": 99}, + {'position': 99}, ) @@ -33,19 +33,19 @@ def test_NonCoding(): crossmap.coordinate_to_noncoding, 3, crossmap.noncoding_to_coordinate, - {"position": 2, "offset": 0, "region": "u"}, + {'position': 2, 'offset': 0, 'region': 'u'}, ) invariant( crossmap.coordinate_to_noncoding, 4, crossmap.noncoding_to_coordinate, - {"position": 1, "offset": 0, "region": "u"}, + {'position': 1, 'offset': 0, 'region': 'u'}, ) invariant( crossmap.coordinate_to_noncoding, 5, crossmap.noncoding_to_coordinate, - {"position": 1, "offset": 0, "region": ""}, + {'position': 1, 'offset': 0, 'region': ''}, ) # Boundary between downstream and transcript. @@ -53,13 +53,13 @@ def test_NonCoding(): crossmap.coordinate_to_noncoding, 71, crossmap.noncoding_to_coordinate, - {"position": 22, "offset": 0, "region": ""}, + {'position': 22, 'offset': 0, 'region': ''}, ) invariant( crossmap.coordinate_to_noncoding, 72, crossmap.noncoding_to_coordinate, - {"position": 1, "offset": 0, "region": "d"}, + {'position': 1, 'offset': 0, 'region': 'd'}, ) @@ -72,13 +72,13 @@ def test_NonCoding_inverted(): crossmap.coordinate_to_noncoding, 72, crossmap.noncoding_to_coordinate, - {"position": 1, "offset": 0, "region": "u"}, + {'position': 1, 'offset': 0, 'region': 'u'}, ) invariant( crossmap.coordinate_to_noncoding, 71, crossmap.noncoding_to_coordinate, - {"position": 1, "offset": 0, "region": ""}, + {'position': 1, 'offset': 0, 'region': ''}, ) # Boundary between downstream and transcript. @@ -86,13 +86,13 @@ def test_NonCoding_inverted(): crossmap.coordinate_to_noncoding, 5, crossmap.noncoding_to_coordinate, - {"position": 22, "offset": 0, "region": ""}, + {'position': 22, 'offset': 0, 'region': ''}, ) invariant( crossmap.coordinate_to_noncoding, 4, crossmap.noncoding_to_coordinate, - {"position": 1, "offset": 0, "region": "d"}, + {'position': 1, 'offset': 0, 'region': 'd'}, ) @@ -105,8 +105,8 @@ def test_NonCoding_degenerate(): crossmap.noncoding_to_coordinate, 4, [ - {"position": 1, "offset": -1, "region": ""}, - {"position": 1, "offset": 0, "region": "u"}, + {'position': 1, 'offset': -1, 'region': ''}, + {'position': 1, 'offset': 0, 'region': 'u'}, ], ) @@ -115,9 +115,9 @@ def test_NonCoding_degenerate(): crossmap.noncoding_to_coordinate, 72, [ - {"position": 1, "offset": 0, "region": "d"}, - {"position": 22, "offset": 1, "region": ""}, - {"position": 23, "offset": 0, "region": ""}, + {'position': 1, 'offset': 0, 'region': 'd'}, + {'position': 22, 'offset': 1, 'region': ''}, + {'position': 23, 'offset': 0, 'region': ''}, ], ) @@ -131,8 +131,8 @@ def test_NonCoding_inverted_degenerate(): crossmap.noncoding_to_coordinate, 72, [ - {"position": 1, "offset": -1, "region": ""}, - {"position": 1, "offset": 0, "region": "u"}, + {'position': 1, 'offset': -1, 'region': ''}, + {'position': 1, 'offset': 0, 'region': 'u'}, ], ) @@ -141,9 +141,9 @@ def test_NonCoding_inverted_degenerate(): crossmap.noncoding_to_coordinate, 4, [ - {"position": 1, "offset": 0, "region": "d"}, - {"position": 23, "offset": 0, "region": ""}, - {"position": 22, "offset": 1, "region": ""}, + {'position': 1, 'offset': 0, 'region': 'd'}, + {'position': 23, 'offset': 0, 'region': ''}, + {'position': 22, 'offset': 1, 'region': ''}, ], ) @@ -157,13 +157,13 @@ def test_Coding(): crossmap.coordinate_to_coding, 31, crossmap.coding_to_coordinate, - {"position": 1, "offset": 0, "region": "-"}, + {'position': 1, 'offset': 0, 'region': '-'}, ) invariant( crossmap.coordinate_to_coding, 32, crossmap.coding_to_coordinate, - {"position": 1, "offset": 0, "region": ""}, + {'position': 1, 'offset': 0, 'region': ''}, ) # Boundary between CDS and 3'. @@ -171,13 +171,13 @@ def test_Coding(): crossmap.coordinate_to_coding, 42, crossmap.coding_to_coordinate, - {"position": 6, "offset": 0, "region": ""}, + {'position': 6, 'offset': 0, 'region': ''}, ) invariant( crossmap.coordinate_to_coding, 43, crossmap.coding_to_coordinate, - {"position": 1, "offset": 0, "region": "*"}, + {'position': 1, 'offset': 0, 'region': '*'}, ) @@ -190,13 +190,13 @@ def test_Coding_inverted(): crossmap.coordinate_to_coding, 43, crossmap.coding_to_coordinate, - {"position": 1, "offset": 0, "region": "-"}, + {'position': 1, 'offset': 0, 'region': '-'}, ) invariant( crossmap.coordinate_to_coding, 42, crossmap.coding_to_coordinate, - {"position": 1, "offset": 0, "region": ""}, + {'position': 1, 'offset': 0, 'region': ''}, ) # Boundary between CDS and 3'. @@ -204,13 +204,13 @@ def test_Coding_inverted(): crossmap.coordinate_to_coding, 32, crossmap.coding_to_coordinate, - {"position": 6, "offset": 0, "region": ""}, + {'position': 6, 'offset': 0, 'region': ''}, ) invariant( crossmap.coordinate_to_coding, 31, crossmap.coding_to_coordinate, - {"position": 1, "offset": 0, "region": "*"}, + {'position': 1, 'offset': 0, 'region': '*'}, ) @@ -223,13 +223,13 @@ def test_Coding_regions(): crossmap.coordinate_to_coding, 25, crossmap.coding_to_coordinate, - {"position": 1, "offset": 5, "region": "-"}, + {'position': 1, 'offset': 5, 'region': '-'}, ) invariant( crossmap.coordinate_to_coding, 26, crossmap.coding_to_coordinate, - {"position": 1, "offset": -4, "region": ""}, + {'position': 1, 'offset': -4, 'region': ''}, ) # Downstream odd length intron between two regions. @@ -237,13 +237,13 @@ def test_Coding_regions(): crossmap.coordinate_to_coding, 44, crossmap.coding_to_coordinate, - {"position": 10, "offset": 5, "region": ""}, + {'position': 10, 'offset': 5, 'region': ''}, ) invariant( crossmap.coordinate_to_coding, 45, crossmap.coding_to_coordinate, - {"position": 1, "offset": -4, "region": "*"}, + {'position': 1, 'offset': -4, 'region': '*'}, ) @@ -256,13 +256,13 @@ def test_Coding_regions_inverted(): crossmap.coordinate_to_coding, 44, crossmap.coding_to_coordinate, - {"position": 1, "offset": 5, "region": "-"}, + {'position': 1, 'offset': 5, 'region': '-'}, ) invariant( crossmap.coordinate_to_coding, 43, crossmap.coding_to_coordinate, - {"position": 1, "offset": -4, "region": ""}, + {'position': 1, 'offset': -4, 'region': ''}, ) # Downstream odd length intron between two regions. @@ -270,13 +270,13 @@ def test_Coding_regions_inverted(): crossmap.coordinate_to_coding, 25, crossmap.coding_to_coordinate, - {"position": 10, "offset": 5, "region": ""}, + {'position': 10, 'offset': 5, 'region': ''}, ) invariant( crossmap.coordinate_to_coding, 24, crossmap.coding_to_coordinate, - {"position": 1, "offset": -4, "region": "*"}, + {'position': 1, 'offset': -4, 'region': '*'}, ) @@ -289,13 +289,13 @@ def test_Coding_no_utr5(): crossmap.coordinate_to_coding, 9, crossmap.coding_to_coordinate, - {"position": 1, "offset": 0, "region": "u"}, + {'position': 1, 'offset': 0, 'region': 'u'}, ) invariant( crossmap.coordinate_to_coding, 10, crossmap.coding_to_coordinate, - {"position": 1, "offset": 0, "region": ""}, + {'position': 1, 'offset': 0, 'region': ''}, ) @@ -308,13 +308,13 @@ def test_Coding_no_utr5_inverted(): crossmap.coordinate_to_coding, 20, crossmap.coding_to_coordinate, - {"position": 1, "offset": 0, "region": "u"}, + {'position': 1, 'offset': 0, 'region': 'u'}, ) invariant( crossmap.coordinate_to_coding, 19, crossmap.coding_to_coordinate, - {"position": 1 , "offset": 0, "region": ""}, + {'position': 1 , 'offset': 0, 'region': ''}, ) @@ -327,13 +327,13 @@ def test_Coding_no_utr3(): crossmap.coordinate_to_coding, 19, crossmap.coding_to_coordinate, - {"position": 5, "offset": 0, "region": ""}, + {'position': 5, 'offset': 0, 'region': ''}, ) invariant( crossmap.coordinate_to_coding, 20, crossmap.coding_to_coordinate, - {"position": 1, "offset": 0, "region": "d"}, + {'position': 1, 'offset': 0, 'region': 'd'}, ) @@ -346,13 +346,13 @@ def test_Coding_no_utr3_inverted(): crossmap.coordinate_to_coding, 10, crossmap.coding_to_coordinate, - {"position": 5, "offset": 0, "region": ""}, + {'position': 5, 'offset': 0, 'region': ''}, ) invariant( crossmap.coordinate_to_coding, 9, crossmap.coding_to_coordinate, - {"position": 1, "offset": 0, "region": "d"}, + {'position': 1, 'offset': 0, 'region': 'd'}, ) @@ -365,19 +365,19 @@ def test_Coding_small_utr5(): crossmap.coordinate_to_coding, 9, crossmap.coding_to_coordinate, - {"position": 1, "offset": 0, "region": "u"}, + {'position': 1, 'offset': 0, 'region': 'u'}, ) invariant( crossmap.coordinate_to_coding, 10, crossmap.coding_to_coordinate, - {"position": 1, "offset": 0, "region": "-"}, + {'position': 1, 'offset': 0, 'region': '-'}, ) invariant( crossmap.coordinate_to_coding, 11, crossmap.coding_to_coordinate, - {"position": 1, "offset": 0, "region": ""}, + {'position': 1, 'offset': 0, 'region': ''}, ) @@ -390,19 +390,19 @@ def test_Coding_small_utr5_inverted(): crossmap.coordinate_to_coding, 20, crossmap.coding_to_coordinate, - {"position": 1, "offset": 0, "region": "u"}, + {'position': 1, 'offset': 0, 'region': 'u'}, ) invariant( crossmap.coordinate_to_coding, 19, crossmap.coding_to_coordinate, - {"position": 1, "offset": 0, "region": "-"}, + {'position': 1, 'offset': 0, 'region': '-'}, ) invariant( crossmap.coordinate_to_coding, 18, crossmap.coding_to_coordinate, - {"position": 1, "offset": 0, "region": ""}, + {'position': 1, 'offset': 0, 'region': ''}, ) @@ -415,19 +415,19 @@ def test_Coding_small_utr3(): crossmap.coordinate_to_coding, 18, crossmap.coding_to_coordinate, - {"position": 4, "offset": 0, "region": ""}, + {'position': 4, 'offset': 0, 'region': ''}, ) invariant( crossmap.coordinate_to_coding, 19, crossmap.coding_to_coordinate, - {"position": 1, "offset": 0, "region": "*"}, + {'position': 1, 'offset': 0, 'region': '*'}, ) invariant( crossmap.coordinate_to_coding, 20, crossmap.coding_to_coordinate, - {"position": 1, "offset": 0, "region": "d"}, + {'position': 1, 'offset': 0, 'region': 'd'}, ) @@ -440,19 +440,19 @@ def test_Coding_small_utr3_inverted(): crossmap.coordinate_to_coding, 11, crossmap.coding_to_coordinate, - {"position": 4, "offset": 0, "region": ""}, + {'position': 4, 'offset': 0, 'region': ''}, ) invariant( crossmap.coordinate_to_coding, 10, crossmap.coding_to_coordinate, - {"position": 1, "offset": 0, "region": "*"}, + {'position': 1, 'offset': 0, 'region': '*'}, ) invariant( crossmap.coordinate_to_coding, 9, crossmap.coding_to_coordinate, - {"position": 1, "offset": 0, "region": "d"}, + {'position': 1, 'offset': 0, 'region': 'd'}, ) @@ -464,25 +464,25 @@ def test_Coding_degenerate(): crossmap.coding_to_coordinate, 9, [ - {"position": 1, "offset": 0, "region": "u"}, - {"position": 2, "offset": 0, "region": "-"}, - {"position": 1, "offset": -2, "region": ""}, - {"position": 1, "offset": -10, "region": "*"}, - {"position": 2, "offset": -11, "region": "*"}, - {"position": 3, "offset": 1, "region": "-"}, - {"position": 4, "offset": 2, "region": "-"}, + {'position': 1, 'offset': 0, 'region': 'u'}, + {'position': 2, 'offset': 0, 'region': '-'}, + {'position': 1, 'offset': -2, 'region': ''}, + {'position': 1, 'offset': -10, 'region': '*'}, + {'position': 2, 'offset': -11, 'region': '*'}, + {'position': 3, 'offset': 1, 'region': '-'}, + {'position': 4, 'offset': 2, 'region': '-'}, ], ) degenerate_equal( crossmap.coding_to_coordinate, 20, [ - {"position": 1, "offset": 0, "region": "d"}, - {"position": 2, "offset": 0, "region": "*"}, - {"position": 8, "offset": 2, "region": ""}, - {"position": 1, "offset": 10, "region": "-"}, - {"position": 2, "offset": 11, "region": "-"}, - {"position": 7, "offset": 3, "region": ""}, + {'position': 1, 'offset': 0, 'region': 'd'}, + {'position': 2, 'offset': 0, 'region': '*'}, + {'position': 8, 'offset': 2, 'region': ''}, + {'position': 1, 'offset': 10, 'region': '-'}, + {'position': 2, 'offset': 11, 'region': '-'}, + {'position': 7, 'offset': 3, 'region': ''}, ], ) @@ -495,24 +495,24 @@ def test_Coding_inverted_degenerate(): crossmap.coding_to_coordinate, 20, [ - {"position": 1, "offset": 0, "region": "u"}, - {"position": 2, "offset": 0, "region": "-"}, - {"position": 1, "offset": -2, "region": ""}, - {"position": 1, "offset": -10, "region": "*"}, - {"position": 1, "offset": -11, "region": "d"}, - {"position": 2, "offset": -3, "region": ""}, + {'position': 1, 'offset': 0, 'region': 'u'}, + {'position': 2, 'offset': 0, 'region': '-'}, + {'position': 1, 'offset': -2, 'region': ''}, + {'position': 1, 'offset': -10, 'region': '*'}, + {'position': 1, 'offset': -11, 'region': 'd'}, + {'position': 2, 'offset': -3, 'region': ''}, ], ) degenerate_equal( crossmap.coding_to_coordinate, 9, [ - {"position": 1, "offset": 0, "region": "d"}, - {"position": 2, "offset": 0, "region": "*"}, - {"position": 8, "offset": 2, "region": ""}, - {"position": 1, "offset": 10, "region": "-"}, - {"position": 1, "offset": 11, "region": "u"}, - {"position": 2, "offset": 12, "region": "u"}, + {'position': 1, 'offset': 0, 'region': 'd'}, + {'position': 2, 'offset': 0, 'region': '*'}, + {'position': 8, 'offset': 2, 'region': ''}, + {'position': 1, 'offset': 10, 'region': '-'}, + {'position': 1, 'offset': 11, 'region': 'u'}, + {'position': 2, 'offset': 12, 'region': 'u'}, ], ) @@ -523,14 +523,14 @@ def test_Coding_degenerate_return(): crossmap = Coding([(10, 20)], (11, 19)) assert crossmap.coordinate_to_coding(9, True) == { - "position": 2, - "offset": 0, - "region": "-", + 'position': 2, + 'offset': 0, + 'region': '-', } assert crossmap.coordinate_to_coding(20, True) == { - "position": 2, - "offset": 0, - "region": "*", + 'position': 2, + 'offset': 0, + 'region': '*', } @@ -540,14 +540,14 @@ def test_Coding_inverted_degenerate_return(): assert crossmap.coordinate_to_coding(20, True) == { - "position": 2, - "offset": 0, - "region": "-", + 'position': 2, + 'offset': 0, + 'region': '-', } assert crossmap.coordinate_to_coding(9, True) == { - "position": 2, - "offset": 0, - "region": "*", + 'position': 2, + 'offset': 0, + 'region': '*', } @@ -573,22 +573,22 @@ def test_Coding_no_utr_degenerate(): crossmap.coding_to_coordinate, 9, [ - {"position": 1, "offset": 0, "region": "u"}, - {"position": 1, "offset": 0, "region": "-"}, - {"position": 1, "offset": -2, "region": "*"}, - {"position": 1, "offset": -1, "region": ""}, - {"position": 1, "offset": -2, "region": "d"}, + {'position': 1, 'offset': 0, 'region': 'u'}, + {'position': 1, 'offset': 0, 'region': '-'}, + {'position': 1, 'offset': -2, 'region': '*'}, + {'position': 1, 'offset': -1, 'region': ''}, + {'position': 1, 'offset': -2, 'region': 'd'}, ], ) degenerate_equal( crossmap.coding_to_coordinate, 11, [ - {"position": 1, "offset": 0, "region": "d"}, - {"position": 1, "offset": 0, "region": "*"}, - {"position": 1, "offset": 2, "region": "-"}, - {"position": 1, "offset": 1, "region": ""}, - {"position": 1, "offset": 2, "region": "u"}, + {'position': 1, 'offset': 0, 'region': 'd'}, + {'position': 1, 'offset': 0, 'region': '*'}, + {'position': 1, 'offset': 2, 'region': '-'}, + {'position': 1, 'offset': 1, 'region': ''}, + {'position': 1, 'offset': 2, 'region': 'u'}, ], ) @@ -601,22 +601,22 @@ def test_Coding_inverted_no_utr_degenerate(): crossmap.coding_to_coordinate, 11, [ - {"position": 1, "offset": 0, "region": "u"}, - {"position": 1, "offset": 0, "region": "-"}, - {"position": 1, "offset": -2, "region": "*"}, - {"position": 1, "offset": -1, "region": ""}, - {"position": 1, "offset": -2, "region": "d"}, + {'position': 1, 'offset': 0, 'region': 'u'}, + {'position': 1, 'offset': 0, 'region': '-'}, + {'position': 1, 'offset': -2, 'region': '*'}, + {'position': 1, 'offset': -1, 'region': ''}, + {'position': 1, 'offset': -2, 'region': 'd'}, ], ) degenerate_equal( crossmap.coding_to_coordinate, 9, [ - {"position": 1, "offset": 0, "region": "d"}, - {"position": 1, "offset": 0, "region": "*"}, - {"position": 1, "offset": 2, "region": "-"}, - {"position": 1, "offset": 1, "region": ""}, - {"position": 1, "offset": 2, "region": "u"}, + {'position': 1, 'offset': 0, 'region': 'd'}, + {'position': 1, 'offset': 0, 'region': '*'}, + {'position': 1, 'offset': 2, 'region': '-'}, + {'position': 1, 'offset': 1, 'region': ''}, + {'position': 1, 'offset': 2, 'region': 'u'}, ], ) @@ -626,24 +626,24 @@ def test_Coding_no_utr_degenerate_return(): crossmap = Coding([(10, 11)], (10, 11)) assert crossmap.coordinate_to_coding(8, True) == { - "position": 2, - "offset": 0, - "region": "-", + 'position': 2, + 'offset': 0, + 'region': '-', } assert crossmap.coordinate_to_coding(9, True) == { - "position": 1, - "offset": 0, - "region": "-", + 'position': 1, + 'offset': 0, + 'region': '-', } assert crossmap.coordinate_to_coding(11, True) == { - "position": 1, - "offset": 0, - "region": "*", + 'position': 1, + 'offset': 0, + 'region': '*', } assert crossmap.coordinate_to_coding(12, True) == { - "position": 2, - "offset": 0, - "region": "*", + 'position': 2, + 'offset': 0, + 'region': '*', } @@ -652,14 +652,14 @@ def test_Coding_inverted_no_utr_degenerate_return(): crossmap = Coding([(10, 11)], (10, 11), True) assert crossmap.coordinate_to_coding(11, True) == { - "position": 1, - "offset": 0, - "region": "-", + 'position': 1, + 'offset': 0, + 'region': '-', } assert crossmap.coordinate_to_coding(9, True) == { - "position": 1, - "offset": 0, - "region": "*", + 'position': 1, + 'offset': 0, + 'region': '*', } @@ -672,13 +672,13 @@ def test_Coding_protein(): crossmap.coordinate_to_protein, 31, crossmap.protein_to_coordinate, - {"position": 1, "position_in_codon": 3, "offset": 0, "region": "-"}, + {'position': 1, 'position_in_codon': 3, 'offset': 0, 'region': '-'}, ) invariant( crossmap.coordinate_to_protein, 32, crossmap.protein_to_coordinate, - {"position": 1, "position_in_codon": 1, "offset": 0, "region": ""}, + {'position': 1, 'position_in_codon': 1, 'offset': 0, 'region': ''}, ) # Intron boundary. @@ -686,13 +686,13 @@ def test_Coding_protein(): crossmap.coordinate_to_protein, 34, crossmap.protein_to_coordinate, - {"position": 1, "position_in_codon": 3, "offset": 0, "region": ""}, + {'position': 1, 'position_in_codon': 3, 'offset': 0, 'region': ''}, ) invariant( crossmap.coordinate_to_protein, 35, crossmap.protein_to_coordinate, - {"position": 1, "position_in_codon": 3, "offset": 1, "region": ""}, + {'position': 1, 'position_in_codon': 3, 'offset': 1, 'region': ''}, ) # Boundary between CDS and 3' UTR. @@ -700,11 +700,11 @@ def test_Coding_protein(): crossmap.coordinate_to_protein, 42, crossmap.protein_to_coordinate, - {"position": 2, "position_in_codon": 3, "offset": 0, "region": ""}, + {'position': 2, 'position_in_codon': 3, 'offset': 0, 'region': ''}, ) invariant( crossmap.coordinate_to_protein, 43, crossmap.protein_to_coordinate, - {"position": 1, "position_in_codon": 1, "offset": 0, "region": "*"}, + {'position': 1, 'position_in_codon': 1, 'offset': 0, 'region': '*'}, ) diff --git a/tests/test_locus.py b/tests/test_locus.py index b650c1a..535f93d 100644 --- a/tests/test_locus.py +++ b/tests/test_locus.py @@ -7,37 +7,37 @@ def test_Locus(): """Forward orientent Lovus.""" locus = Locus((30, 35)) - invariant(locus.to_position, 29, locus.to_coordinate, {"position": 0, "offset": -1}) - invariant(locus.to_position, 30, locus.to_coordinate, {"position": 0, "offset": 0}) - invariant(locus.to_position, 31, locus.to_coordinate, {"position": 1, "offset": 0}) - invariant(locus.to_position, 33, locus.to_coordinate, {"position": 3, "offset": 0}) - invariant(locus.to_position, 34, locus.to_coordinate, {"position": 4, "offset": 0}) - invariant(locus.to_position, 35, locus.to_coordinate, {"position": 4, "offset": 1}) + invariant(locus.to_position, 29, locus.to_coordinate, {'position': 0, 'offset': -1}) + invariant(locus.to_position, 30, locus.to_coordinate, {'position': 0, 'offset': 0}) + invariant(locus.to_position, 31, locus.to_coordinate, {'position': 1, 'offset': 0}) + invariant(locus.to_position, 33, locus.to_coordinate, {'position': 3, 'offset': 0}) + invariant(locus.to_position, 34, locus.to_coordinate, {'position': 4, 'offset': 0}) + invariant(locus.to_position, 35, locus.to_coordinate, {'position': 4, 'offset': 1}) def test_Locus_inverted(): """Reverse orientent Lovus.""" locus = Locus((30, 35), True) - invariant(locus.to_position, 35, locus.to_coordinate, {"position": 0, "offset": -1}) - invariant(locus.to_position, 34, locus.to_coordinate, {"position": 0, "offset": 0}) - invariant(locus.to_position, 33, locus.to_coordinate, {"position": 1, "offset": 0}) - invariant(locus.to_position, 31, locus.to_coordinate, {"position": 3, "offset": 0}) - invariant(locus.to_position, 30, locus.to_coordinate, {"position": 4, "offset": 0}) - invariant(locus.to_position, 29, locus.to_coordinate, {"position": 4, "offset": 1}) + invariant(locus.to_position, 35, locus.to_coordinate, {'position': 0, 'offset': -1}) + invariant(locus.to_position, 34, locus.to_coordinate, {'position': 0, 'offset': 0}) + invariant(locus.to_position, 33, locus.to_coordinate, {'position': 1, 'offset': 0}) + invariant(locus.to_position, 31, locus.to_coordinate, {'position': 3, 'offset': 0}) + invariant(locus.to_position, 30, locus.to_coordinate, {'position': 4, 'offset': 0}) + invariant(locus.to_position, 29, locus.to_coordinate, {'position': 4, 'offset': 1}) def test_Locus_degenerate(): """Degenerate positions are silently corrected.""" locus = Locus((10, 20)) - degenerate_equal(locus.to_coordinate, 9, [{"position": 0, "offset": -1}, {"position": -1, "offset": 0}]) - degenerate_equal(locus.to_coordinate, 20, [{"position": 9, "offset": 1}, {"position": 10, "offset": 0}]) + degenerate_equal(locus.to_coordinate, 9, [{'position': 0, 'offset': -1}, {'position': -1, 'offset': 0}]) + degenerate_equal(locus.to_coordinate, 20, [{'position': 9, 'offset': 1}, {'position': 10, 'offset': 0}]) def test_Locus_inverted_degenerate(): """Degenerate positions are silently corrected.""" locus = Locus((10, 20), True) - degenerate_equal(locus.to_coordinate, 20, [{"position": 0, "offset": -1}, {"position": -1, "offset": 0}]) - degenerate_equal(locus.to_coordinate, 9, [{"position": 9, "offset": 1}, {"position": 10, "offset": 0}]) + degenerate_equal(locus.to_coordinate, 20, [{'position': 0, 'offset': -1}, {'position': -1, 'offset': 0}]) + degenerate_equal(locus.to_coordinate, 9, [{'position': 9, 'offset': 1}, {'position': 10, 'offset': 0}]) diff --git a/tests/test_multi_locus.py b/tests/test_multi_locus.py index 3434a62..ab3b877 100644 --- a/tests/test_multi_locus.py +++ b/tests/test_multi_locus.py @@ -35,14 +35,14 @@ def test_MultiLocus(): multi_locus.to_position, 4, multi_locus.to_coordinate, - {"position": 1, "offset": 0, "region": "u"}, + {'position': 1, 'offset': 0, 'region': 'u'}, ) invariant( multi_locus.to_position, 5, multi_locus.to_coordinate, - {"position": 0, "offset": 0, "region": ""}, + {'position': 0, 'offset': 0, 'region': ''}, ) # Internal locus. @@ -50,37 +50,37 @@ def test_MultiLocus(): multi_locus.to_position, 29, multi_locus.to_coordinate, - {"position": 9, "offset": -1, "region": ""}, + {'position': 9, 'offset': -1, 'region': ''}, ) invariant( multi_locus.to_position, 30, multi_locus.to_coordinate, - {"position": 9, "offset": 0, "region": ""}, + {'position': 9, 'offset': 0, 'region': ''}, ) invariant( multi_locus.to_position, 31, multi_locus.to_coordinate, - {"position": 10, "offset": 0, "region": ""}, + {'position': 10, 'offset': 0, 'region': ''}, ) invariant( multi_locus.to_position, 33, multi_locus.to_coordinate, - {"position": 12, "offset": 0, "region": ""}, + {'position': 12, 'offset': 0, 'region': ''}, ) invariant( multi_locus.to_position, 34, multi_locus.to_coordinate, - {"position": 13, "offset": 0, "region": ""}, + {'position': 13, 'offset': 0, 'region': ''}, ) invariant( multi_locus.to_position, 35, multi_locus.to_coordinate, - {"position": 13, "offset": 1, "region": ""}, + {'position': 13, 'offset': 1, 'region': ''}, ) # Boundary between the last locus and downstream. @@ -88,13 +88,13 @@ def test_MultiLocus(): multi_locus.to_position, 71, multi_locus.to_coordinate, - {"position": 21, "offset": 0, "region": ""}, + {'position': 21, 'offset': 0, 'region': ''}, ) invariant( multi_locus.to_position, 72, multi_locus.to_coordinate, - {"position": 1, "offset": 0, "region": "d"}, + {'position': 1, 'offset': 0, 'region': 'd'}, ) @@ -107,13 +107,13 @@ def test_MultiLocus_inverted(): multi_locus.to_position, 72, multi_locus.to_coordinate, - {"position": 1, "offset": 0, "region": "u"}, + {'position': 1, 'offset': 0, 'region': 'u'}, ) invariant( multi_locus.to_position, 71, multi_locus.to_coordinate, - {"position": 0, "offset": 0, "region": ""}, + {'position': 0, 'offset': 0, 'region': ''}, ) # Internal locus. @@ -121,37 +121,37 @@ def test_MultiLocus_inverted(): multi_locus.to_position, 35, multi_locus.to_coordinate, - {"position": 8, "offset": -1, "region": ""}, + {'position': 8, 'offset': -1, 'region': ''}, ) invariant( multi_locus.to_position, 34, multi_locus.to_coordinate, - {"position": 8, "offset": 0, "region": ""}, + {'position': 8, 'offset': 0, 'region': ''}, ) invariant( multi_locus.to_position, 33, multi_locus.to_coordinate, - {"position": 9, "offset": 0, "region": ""}, + {'position': 9, 'offset': 0, 'region': ''}, ) invariant( multi_locus.to_position, 31, multi_locus.to_coordinate, - {"position": 11, "offset": 0, "region": ""}, + {'position': 11, 'offset': 0, 'region': ''}, ) invariant( multi_locus.to_position, 30, multi_locus.to_coordinate, - {"position": 12, "offset": 0, "region": ""}, + {'position': 12, 'offset': 0, 'region': ''}, ) invariant( multi_locus.to_position, 29, multi_locus.to_coordinate, - {"position": 12, "offset": 1, "region": ""}, + {'position': 12, 'offset': 1, 'region': ''}, ) # Boundary between the last locus and downstream. @@ -159,13 +159,13 @@ def test_MultiLocus_inverted(): multi_locus.to_position, 5, multi_locus.to_coordinate, - {"position": 21, "offset": 0, "region": ""}, + {'position': 21, 'offset': 0, 'region': ''}, ) invariant( multi_locus.to_position, 4, multi_locus.to_coordinate, - {"position": 1, "offset": 0, "region": "d"}, + {'position': 1, 'offset': 0, 'region': 'd'}, ) @@ -177,13 +177,13 @@ def test_MultiLocus_adjacent_loci(): multi_locus.to_position, 2, multi_locus.to_coordinate, - {"position": 1, "offset": 0, "region": ""}, + {'position': 1, 'offset': 0, 'region': ''}, ) invariant( multi_locus.to_position, 3, multi_locus.to_coordinate, - {"position": 2, "offset": 0, "region": ""}, + {'position': 2, 'offset': 0, 'region': ''}, ) @@ -195,13 +195,13 @@ def test_MultiLocus_adjacent_loci_inverted(): multi_locus.to_position, 3, multi_locus.to_coordinate, - {"position": 1, "offset": 0, "region": ""}, + {'position': 1, 'offset': 0, 'region': ''}, ) invariant( multi_locus.to_position, 2, multi_locus.to_coordinate, - {"position": 2, "offset": 0, "region": ""}, + {'position': 2, 'offset': 0, 'region': ''}, ) @@ -213,13 +213,13 @@ def test_MultiLocus_offsets_odd(): multi_locus.to_position, 4, multi_locus.to_coordinate, - {"position": 1, "offset": 2, "region": ""}, + {'position': 1, 'offset': 2, 'region': ''}, ) invariant( multi_locus.to_position, 5, multi_locus.to_coordinate, - {"position": 2, "offset": -1, "region": ""}, + {'position': 2, 'offset': -1, 'region': ''}, ) @@ -231,13 +231,13 @@ def test_MultiLocus_offsets_odd_inverted(): multi_locus.to_position, 4, multi_locus.to_coordinate, - {"position": 1, "offset": 2, "region": ""}, + {'position': 1, 'offset': 2, 'region': ''}, ) invariant( multi_locus.to_position, 3, multi_locus.to_coordinate, - {"position": 2, "offset": -1, "region": ""}, + {'position': 2, 'offset': -1, 'region': ''}, ) @@ -249,13 +249,13 @@ def test_MultiLocus_offsets_even(): multi_locus.to_position, 4, multi_locus.to_coordinate, - {"position": 1, "offset": 2, "region": ""}, + {'position': 1, 'offset': 2, 'region': ''}, ) invariant( multi_locus.to_position, 5, multi_locus.to_coordinate, - {"position": 2, "offset": -2, "region": ""}, + {'position': 2, 'offset': -2, 'region': ''}, ) @@ -267,13 +267,13 @@ def test_MultiLocus_offsets_even_inverted(): multi_locus.to_position, 5, multi_locus.to_coordinate, - {"position": 1, "offset": 2, "region": ""}, + {'position': 1, 'offset': 2, 'region': ''}, ) invariant( multi_locus.to_position, 4, multi_locus.to_coordinate, - {"position": 2, "offset": -2, "region": ""}, + {'position': 2, 'offset': -2, 'region': ''}, ) @@ -285,9 +285,9 @@ def test_MultiLocus_degenerate(): multi_locus.to_coordinate, 4, [ - {"position": 0, "offset": -1, "region": "u"}, - {"position": 1, "offset": 0, "region": "u"}, - {"position": -1, "offset": 0, "region": "u"}, + {'position': 0, 'offset': -1, 'region': 'u'}, + {'position': 1, 'offset': 0, 'region': 'u'}, + {'position': -1, 'offset': 0, 'region': 'u'}, ], ) @@ -295,8 +295,8 @@ def test_MultiLocus_degenerate(): multi_locus.to_coordinate, 72, [ - {"position": 0, "offset": 1, "region": "d"}, - {"position": 1, "offset": 0, "region": "d"}, + {'position': 0, 'offset': 1, 'region': 'd'}, + {'position': 1, 'offset': 0, 'region': 'd'}, ], ) @@ -309,9 +309,9 @@ def test_MultiLocus_inverted_degenerate(): multi_locus.to_coordinate, 72, [ - {"position": 0, "offset": 1, "region": "u"}, - {"position": -1, "offset": 0, "region": "u"}, - {"position": 1, "offset": 0, "region": "u"}, + {'position': 0, 'offset': 1, 'region': 'u'}, + {'position': -1, 'offset': 0, 'region': 'u'}, + {'position': 1, 'offset': 0, 'region': 'u'}, ], ) @@ -319,7 +319,7 @@ def test_MultiLocus_inverted_degenerate(): multi_locus.to_coordinate, 4, [ - {"position": 0, "offset": -1, "region": "d"}, - {"position": 1, "offset": 0, "region": "d"}, + {'position': 0, 'offset': -1, 'region': 'd'}, + {'position': 1, 'offset': 0, 'region': 'd'}, ], ) From 2f167e0d7abebbfe90a4e784f11b89af3202df08 Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Fri, 20 Mar 2026 17:29:27 +0100 Subject: [PATCH 091/127] Cleanup, use typings --- mutalyzer_crossmapper/crossmapper.py | 28 ++++++++++++++-------------- mutalyzer_crossmapper/location.py | 4 ++-- mutalyzer_crossmapper/locus.py | 6 +++--- mutalyzer_crossmapper/multi_locus.py | 12 ++++++------ 4 files changed, 25 insertions(+), 25 deletions(-) diff --git a/mutalyzer_crossmapper/crossmapper.py b/mutalyzer_crossmapper/crossmapper.py index e68bbbd..42ab6d5 100644 --- a/mutalyzer_crossmapper/crossmapper.py +++ b/mutalyzer_crossmapper/crossmapper.py @@ -3,7 +3,7 @@ class Genomic(object): """Genomic crossmap object.""" - def coordinate_to_genomic(self, coordinate): + def coordinate_to_genomic(self, coordinate: int) -> dict: """Convert a coordinate to a genomic position (g./m./o.). :arg int coordinate: Coordinate. @@ -12,7 +12,7 @@ def coordinate_to_genomic(self, coordinate): """ return {'position': coordinate + 1} - def genomic_to_coordinate(self, pos_m): + def genomic_to_coordinate(self, pos_m: dict) -> int: """Convert a genomic position (g./m./o.) to a coordinate. :arg dict pos_m: Genomic position model. @@ -24,7 +24,7 @@ def genomic_to_coordinate(self, pos_m): class NonCoding(Genomic): """NonCoding crossmap object.""" - def __init__(self, locations, inverted=False): + def __init__(self, locations: list[tuple[int, int]], inverted: bool = False) -> None: """ :arg list locations: List of locus locations. :arg bool inverted: Orientation. @@ -33,7 +33,7 @@ def __init__(self, locations, inverted=False): self._noncoding = MultiLocus(locations, inverted) - def coordinate_to_noncoding(self, coordinate): + def coordinate_to_noncoding(self, coordinate: int) -> dict: """Convert a coordinate to a noncoding position (n./r.). :arg int coordinate: Coordinate. @@ -45,7 +45,7 @@ def coordinate_to_noncoding(self, coordinate): pos_m['position'] = pos_m['position'] + 1 return pos_m - def noncoding_to_coordinate(self, pos_m): + def noncoding_to_coordinate(self, pos_m: dict) -> int: """Convert a noncoding position (n./r.) to a coordinate. :arg dict pos_m: Noncoding position model. @@ -60,7 +60,7 @@ def noncoding_to_coordinate(self, pos_m): class Coding(NonCoding): """Coding crossmap object.""" - def __init__(self, locations, cds, inverted=False): + def __init__(self, locations: list[tuple[int,int]], cds: tuple[int,int], inverted : bool=False) -> None: """ :arg list locations: List of locus locations. :arg tuple cds: Locus location. @@ -80,7 +80,7 @@ def __init__(self, locations, cds, inverted=False): self._coding = (b0['position'] + b0['offset'], b1['position'] + b1['offset'] +1) self._exons = (e0['position'], e1['position']) - def _degenerate_position(self, pos_m): + def _degenerate_position(self, pos_m: dict) -> dict: """Degenerate a coding position model (c./r.). :arg dict pos_m: Coding position model. @@ -106,7 +106,7 @@ def _degenerate_position(self, pos_m): degenerated_pos_m['region'] = '*' return degenerated_pos_m - def _normalize_position(self, pos_m): + def _normalize_position(self, pos_m: dict) -> dict: """Normalize a coding position model (c./r.). :arg dict pos_m: Coding position model. @@ -121,7 +121,7 @@ def _normalize_position(self, pos_m): coordinate = coordinate + pos_m['offset'] return self.coordinate_to_coding(coordinate) - def _coordinate_to_coding(self, coordinate): + def _coordinate_to_coding(self, coordinate: int) -> dict: """Convert a coordinate to a coding position (c./r.). :arg int coordinate: Coordinate. @@ -153,7 +153,7 @@ def _coordinate_to_coding(self, coordinate): 'region': '' } - def coordinate_to_coding(self, coordinate, degenerate=False): + def coordinate_to_coding(self, coordinate: tuple[int, int], degenerate: bool=False) -> dict: """Convert a coordinate to a coding position (c./r.). :arg int coordinate: Coordinate. @@ -168,7 +168,7 @@ def coordinate_to_coding(self, coordinate, degenerate=False): return pos_m - def _coding_to_coordinate(self, pos_m): + def _coding_to_coordinate(self, pos_m: dict) -> int: """Convert a coding position (c./r.) to a coordinate. :arg dict pos_m: Coding position model (c./r.). @@ -191,7 +191,7 @@ def _coding_to_coordinate(self, pos_m): return self._noncoding.to_coordinate(noncoding_pos_m) - def coding_to_coordinate(self, pos_m): + def coding_to_coordinate(self, pos_m: dict) -> int: """Convert a coding position (c./r.) to a coordinate. :arg dict pos_m: Coding position model (c./r.). @@ -202,7 +202,7 @@ def coding_to_coordinate(self, pos_m): return self._coding_to_coordinate(normalized_pos_m) - def coordinate_to_protein(self, coordinate): + def coordinate_to_protein(self, coordinate: int) -> dict: """Convert a coordinate to a protein position (p.). :arg int coordinate: Coordinate. @@ -227,7 +227,7 @@ def coordinate_to_protein(self, coordinate): 'position_in_codon': (position + 2) % 3 + 1, **{k: v for k, v in pos.items() if k != 'position'}} - def protein_to_coordinate(self, pos_m): + def protein_to_coordinate(self, pos_m: dict) -> int: """Convert a protein position (p.) to a coordinate. :arg dict position: Protein position model(p.). diff --git a/mutalyzer_crossmapper/location.py b/mutalyzer_crossmapper/location.py index e580672..1bf7a06 100644 --- a/mutalyzer_crossmapper/location.py +++ b/mutalyzer_crossmapper/location.py @@ -1,4 +1,4 @@ -def _nearest_boundary(lb, rb, c, p): +def _nearest_boundary(lb: int, rb: int, c: int, p: int) -> int: """Find the boundary nearest to `c`. In case of a draw, the parameter `p` decides which one is chosen. @@ -19,7 +19,7 @@ def _nearest_boundary(lb, rb, c, p): return p -def nearest_location(ls, c, p=0): +def nearest_location(ls: list[tuple[int,int]], c: int, p: int = 0) -> int: """Find the location nearest to `c`. In case of a draw, the parameter `p` decides which index is chosen. diff --git a/mutalyzer_crossmapper/locus.py b/mutalyzer_crossmapper/locus.py index eb23efd..15b15a0 100644 --- a/mutalyzer_crossmapper/locus.py +++ b/mutalyzer_crossmapper/locus.py @@ -1,6 +1,6 @@ class Locus(object): """Locus object.""" - def __init__(self, location, inverted=False): + def __init__(self, location: list[tuple[int, int]], inverted=False) -> None: """ :arg tuple location: Locus location. :arg bool inverted: Orientation. @@ -10,7 +10,7 @@ def __init__(self, location, inverted=False): self.boundary = location[0], location[1] - 1 self._end = self.boundary[1] - self.boundary[0] - def to_position(self, coordinate): + def to_position(self, coordinate: int) -> dict: """Convert a coordinate to a proper position model. :arg int coordinate: Coordinate. @@ -30,7 +30,7 @@ def to_position(self, coordinate): return {'position': self._end, 'offset': coordinate - self.boundary[1]} return {'position': coordinate - self.boundary[0], 'offset': 0} - def to_coordinate(self, pos_m): + def to_coordinate(self, pos_m: dict) -> int: """Convert a position model to a coordinate. :arg dict position: Position model with 'position' and 'offset' keys. diff --git a/mutalyzer_crossmapper/multi_locus.py b/mutalyzer_crossmapper/multi_locus.py index c76a39b..001ed2c 100644 --- a/mutalyzer_crossmapper/multi_locus.py +++ b/mutalyzer_crossmapper/multi_locus.py @@ -5,7 +5,7 @@ from .locus import Locus -def _offsets(locations, orientation): +def _offsets(locations: list[tuple[int, int]], orientation: int) -> list[int]: """For each location, calculate the length of the preceding locations. :arg list locations: List of locations. @@ -19,7 +19,7 @@ def _offsets(locations, orientation): class MultiLocus(object): """MultiLocus object.""" - def __init__(self, locations:list, inverted=False): + def __init__(self, locations: list[tuple[int, int]], inverted=False) -> None: """ :arg list locations: List of locus locations. :arg bool inverted: Orientation. @@ -31,12 +31,12 @@ def __init__(self, locations:list, inverted=False): self._orientation = -1 if inverted else 1 self._offsets = _offsets(locations, self._orientation) - def _direction(self, index): + def _direction(self, index: int) -> int: if self._inverted: return len(self._offsets) - index - 1 return index - def outside(self, coordinate:int): + def outside(self, coordinate: int) -> int: """Calculate the offset relative to this MultiLocus. :arg int coordinate: Coordinate. @@ -49,7 +49,7 @@ def outside(self, coordinate:int): return coordinate - self._loci[-1].boundary[1] return 0 - def to_position(self, coordinate:int): + def to_position(self, coordinate: int) -> dict: """Convert a coordinate to a position. :arg int coordinate: Coordinate. @@ -74,7 +74,7 @@ def to_position(self, coordinate:int): 'region': region } - def to_coordinate(self, pos_m:dict): + def to_coordinate(self, pos_m: dict) -> int: """Convert a position model to a coordinate. :arg dict pos_m: Position model with 'position','offset' and 'region' keys. From ac0cd16d0cbfdfb49abcab494f5d52228fe36a7d Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Mon, 23 Mar 2026 10:35:49 +0100 Subject: [PATCH 092/127] Cleanup --- docs/library.rst | 46 ++++++++++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/docs/library.rst b/docs/library.rst index a1c236e..83bf73a 100644 --- a/docs/library.rst +++ b/docs/library.rst @@ -22,7 +22,7 @@ They are represented as 1-key dictionaries. Below is an example of ``g.1`` in HG Where: -- **position**: a positive integer repersenting a base position(>0) +- **position**: an integer repersenting a base position (>0) Genomic Position Conversion ~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -32,7 +32,7 @@ Genomic Position Conversion >>> from mutalyzer_crossmapper import Genomic >>> crossmap = Genomic() -The functions ``coordinate_to_genomic()`` and ``genomic_to_coordinate`` can be +The functions ``coordinate_to_genomic()`` and ``genomic_to_coordinate()`` can be used to convert to and from genomic positions. .. code:: python @@ -181,7 +181,7 @@ The ``Coding`` class -------------------- The ``Coding`` class provides an interface to all conversions between -coding (``c.``, ``r.``) rpositioning systems and coordinates. Conversions between +coding (``c.``, ``r.``) positions and coordinates. Conversions between positioning systems should be done via a coordinate. Coding Position Model @@ -307,6 +307,11 @@ In the following table, we show a number of annotated examples. - 0 - ``*`` - ``c.*5`` + * - 72 + - 1 + - 0 + - ``d`` + - ``c.d1`` * - 79 - 8 - 0 @@ -348,8 +353,8 @@ position ``p.2``. We can convert between these to as follows. >>> crossmap.protein_to_coordinate({'position':2, 'position_in_codon':2, 'offset':0, 'region':''}) 41 -Note that the protein position only corresponds with the HGVS "p." notation -when the offset equals ``0`` and the region equals ``1``. In the following +**Note:** protein position only corresponds with the HGVS "p." notation +when the offset equals ``0`` and the region equals ``''``. In the following table, we show a number of annotated examples. .. _table_protein: @@ -380,18 +385,6 @@ table, we show a number of annotated examples. - 0 - ``-`` - invalid - * - 7 - - 3 - - 1 - - 0 - - ``-`` - - invalid - * - 8 - - 3 - - 1 - - 1 - - ``-`` - - invalid * - 31 - 1 - 3 @@ -410,6 +403,18 @@ table, we show a number of annotated examples. - 0 - - ``p.1`` + * - 34 + - 1 + - 3 + - 0 + - + - ``p.1`` + * - 35 + - 1 + - 3 + - 1 + - + - ``p.1`` * - 42 - 2 - 3 @@ -476,9 +481,6 @@ The ``Locus`` class The ``Locus`` class is used to deal with offsets with respect to a single locus. -**Note:** the ``position`` values in the position dictionaries are **0-based**, -so the first base of the locus corresponds to ``{'position': 0, 'offset': 0}``. -This differs from HGVS numbering, which is **1-based**. .. code:: python @@ -490,6 +492,10 @@ converting from a locus position to a coordinate and vice versa. These functions work with a 2-key dictionary, see the section about `The NonCoding class`_ for the semantics. +**Note:** the ``position`` values in the position dictionaries are **0-based**, +so the first base of the locus corresponds to ``{'position': 0, 'offset': 0}``. +This differs from HGVS numbering, which is **1-based**. + .. code:: python >>> locus.to_position(9) From 2ebf6c95df03a381b3f2cba8b5ebc82fd5266ef0 Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Mon, 23 Mar 2026 11:08:40 +0100 Subject: [PATCH 093/127] Cleanup --- Makefile | 20 -------------------- conf.py | 28 ---------------------------- index.rst | 17 ----------------- make.bat | 35 ----------------------------------- 4 files changed, 100 deletions(-) delete mode 100644 Makefile delete mode 100644 conf.py delete mode 100644 index.rst delete mode 100644 make.bat diff --git a/Makefile b/Makefile deleted file mode 100644 index d4bb2cb..0000000 --- a/Makefile +++ /dev/null @@ -1,20 +0,0 @@ -# Minimal makefile for Sphinx documentation -# - -# You can set these variables from the command line, and also -# from the environment for the first two. -SPHINXOPTS ?= -SPHINXBUILD ?= sphinx-build -SOURCEDIR = . -BUILDDIR = _build - -# Put it first so that "make" without argument is like "make help". -help: - @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) - -.PHONY: help Makefile - -# Catch-all target: route all unknown targets to Sphinx using the new -# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). -%: Makefile - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/conf.py b/conf.py deleted file mode 100644 index a50b31a..0000000 --- a/conf.py +++ /dev/null @@ -1,28 +0,0 @@ -# Configuration file for the Sphinx documentation builder. -# -# For the full list of built-in configuration values, see the documentation: -# https://www.sphinx-doc.org/en/master/usage/configuration.html - -# -- Project information ----------------------------------------------------- -# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information - -project = 'crossmapper_dict' -copyright = '2026, Xiaoyun Liu' -author = 'Xiaoyun Liu' -release = '1.0.0' - -# -- General configuration --------------------------------------------------- -# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration - -extensions = [] - -templates_path = ['_templates'] -exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] - - - -# -- Options for HTML output ------------------------------------------------- -# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output - -html_theme = 'alabaster' -html_static_path = ['_static'] diff --git a/index.rst b/index.rst deleted file mode 100644 index 3d88e72..0000000 --- a/index.rst +++ /dev/null @@ -1,17 +0,0 @@ -.. crossmapper_dict documentation master file, created by - sphinx-quickstart on Fri Mar 20 15:13:36 2026. - You can adapt this file completely to your liking, but it should at least - contain the root `toctree` directive. - -crossmapper_dict documentation -============================== - -Add your content using ``reStructuredText`` syntax. See the -`reStructuredText `_ -documentation for details. - - -.. toctree:: - :maxdepth: 2 - :caption: Contents: - diff --git a/make.bat b/make.bat deleted file mode 100644 index 32bb245..0000000 --- a/make.bat +++ /dev/null @@ -1,35 +0,0 @@ -@ECHO OFF - -pushd %~dp0 - -REM Command file for Sphinx documentation - -if "%SPHINXBUILD%" == "" ( - set SPHINXBUILD=sphinx-build -) -set SOURCEDIR=. -set BUILDDIR=_build - -%SPHINXBUILD% >NUL 2>NUL -if errorlevel 9009 ( - echo. - echo.The 'sphinx-build' command was not found. Make sure you have Sphinx - echo.installed, then set the SPHINXBUILD environment variable to point - echo.to the full path of the 'sphinx-build' executable. Alternatively you - echo.may add the Sphinx directory to PATH. - echo. - echo.If you don't have Sphinx installed, grab it from - echo.https://www.sphinx-doc.org/ - exit /b 1 -) - -if "%1" == "" goto help - -%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% -goto end - -:help -%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% - -:end -popd From f2e8efa7e49b0706111dc97970d4e190a189f62a Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Mon, 23 Mar 2026 11:13:44 +0100 Subject: [PATCH 094/127] Stop trackinhg local file --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 381420f..324acae 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ docs/_build/ mutalyzer_crossmapper.egg-info/ mutalyzer_crossmapper/__pycache__/ tests/__pycache__/ +tmp \ No newline at end of file From 4e6443c797c35760367dafb93fcaf3da48c2781e Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Mon, 23 Mar 2026 11:16:27 +0100 Subject: [PATCH 095/127] Delete tmp files from branch --- tmp/t.t | 80 ------------------------------------------ tmp/test_degenerate.py | 51 --------------------------- 2 files changed, 131 deletions(-) delete mode 100644 tmp/t.t delete mode 100644 tmp/test_degenerate.py diff --git a/tmp/t.t b/tmp/t.t deleted file mode 100644 index b473489..0000000 --- a/tmp/t.t +++ /dev/null @@ -1,80 +0,0 @@ -"0", "4", "2", "0", "u" -"1", "4", "2", "0", "u" -"2", "4", "2", "0", "u" -"3", "4", "2", "0", "u" -"4", "4", "2", "0", "u" -"5", "4", "2", "0", "-" -"6", "4", "3", "0", "-" -"7", "3", "1", "0", "-" -"8", "3", "1", "1", "-" -"9", "3", "1", "2", "-" -"10", "3", "1", "3", "-" -"11", "3", "2", "-3", "-" -"12", "3", "2", "-2", "-" -"13", "3", "2", "-1", "-" -"14", "3", "2", "0", "-" -"15", "3", "3", "0", "-" -"16", "2", "1", "0", "-" -"17", "2", "2", "0", "-" -"18", "2", "3", "0", "-" -"19", "1", "1", "0", "-" -"20", "1", "1", "1", "-" -"21", "1", "1", "2", "-" -"22", "1", "1", "3", "-" -"23", "1", "1", "4", "-" -"24", "1", "1", "5", "-" -"25", "1", "2", "-5", "-" -"26", "1", "2", "-4", "-" -"27", "1", "2", "-3", "-" -"28", "1", "2", "-2", "-" -"29", "1", "2", "-1", "-" -"30", "1", "2", "0", "-" -"31", "1", "3", "0", "-" -"32", "1", "1", "0", "" -"33", "1", "2", "0", "" -"34", "1", "3", "0", "" -"35", "1", "3", "1", "" -"36", "1", "3", "2", "" -"37", "1", "3", "3", "" -"38", "2", "1", "-2", "" -"39", "2", "1", "-1", "" -"40", "2", "1", "0", "" -"41", "2", "2", "0", "" -"42", "2", "3", "0", "" -"43", "1", "1", "0", "*" -"44", "1", "1", "1", "*" -"45", "1", "1", "2", "*" -"46", "1", "1", "3", "*" -"47", "1", "2", "-3", "*" -"48", "1", "2", "-2", "*" -"49", "1", "2", "-1", "*" -"50", "1", "2", "0", "*" -"51", "1", "3", "0", "*" -"52", "1", "3", "1", "*" -"53", "1", "3", "2", "*" -"54", "1", "3", "3", "*" -"55", "1", "3", "4", "*" -"56", "1", "3", "5", "*" -"57", "1", "3", "6", "*" -"58", "1", "3", "7", "*" -"59", "1", "3", "8", "*" -"60", "1", "3", "9", "*" -"61", "2", "1", "-9", "*" -"62", "2", "1", "-8", "*" -"63", "2", "1", "-7", "*" -"64", "2", "1", "-6", "*" -"65", "2", "1", "-5", "*" -"66", "2", "1", "-4", "*" -"67", "2", "1", "-3", "*" -"68", "2", "1", "-2", "*" -"69", "2", "1", "-1", "*" -"70", "2", "1", "0", "*" -"71", "2", "2", "0", "*" -"72", "2", "2", "0", "d" -"73", "2", "2", "0", "d" -"74", "2", "2", "0", "d" -"75", "2", "2", "0", "d" -"76", "2", "2", "0", "d" -"77", "2", "2", "0", "d" -"78", "2", "2", "0", "d" -"79", "2", "2", "0", "d" diff --git a/tmp/test_degenerate.py b/tmp/test_degenerate.py deleted file mode 100644 index c513d0e..0000000 --- a/tmp/test_degenerate.py +++ /dev/null @@ -1,51 +0,0 @@ -"""a script to check degenerate option""" -from mutalyzer_crossmapper import Coding, Genomic, NonCoding - - -def serialize(pos_m: dict): - if pos_m["offset"] > 0: - return f"{pos_m['region']}{pos_m['position']}+{pos_m['offset']}" - elif pos_m["offset"] < 0: - return f"{pos_m['region']}{pos_m['position']}{pos_m['offset']}" - else: - return f"{pos_m['region']}{pos_m['position']}" - -_exons = [(5, 8), (14, 20), (30, 35), (40, 44), (50, 52), (70, 72)] -_cds = (32, 43) - - -nc_crossmap = NonCoding(_exons, True) -c_crossmap = Coding(_exons, _cds) -test = Coding([(10, 11)], (10, 11)) - - -for i in range(0, 80): - # nc = nc_crossmap.coordinate_to_noncoding(i) - c = c_crossmap.coordinate_to_protein(i) - - # print(i, c, c_crossmap.coding_to_coordinate(c)) - # c_de = nc_crossmap.coordinate_to_coding(i, True) - # nc_de = nc_crossmap.coordinate_to_coding(i, True) - print(i, c) - # print(f'"{i}", "{c["position"]}", "{c["position_in_codon"]}", "{c["offset"]}", "{c["region"]}"') - - -# crossmap = Coding(_exons, _cds) -# for i in range(0, 80): -# print(i, crossmap.coordinate_to_coding(i), crossmap.coordinate_to_coding(i, degenerate=True)) - -# nc_crossmap = NonCoding(_exons) -# for i in range(0, 80): -# print(i, nc_crossmap.coordinate_to_noncoding(i)) - - - - - - -# degereate option -""" -With this option, it keeps counting c_pos outside the exons range -e.g., (-16, 0, -1, -5) means -c_pos=16, offset_to_c_pos=0, before_CDS, offset_to_exons_range = -5 -""" From 8deedc15cfc7d1aab64a9d47833cdf54d62ff2ff Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Mon, 23 Mar 2026 11:43:08 +0100 Subject: [PATCH 096/127] Cleanup --- mutalyzer_crossmapper/crossmapper.py | 19 ++++++++++--------- mutalyzer_crossmapper/locus.py | 2 +- mutalyzer_crossmapper/multi_locus.py | 5 ++--- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/mutalyzer_crossmapper/crossmapper.py b/mutalyzer_crossmapper/crossmapper.py index 42ab6d5..6d4f656 100644 --- a/mutalyzer_crossmapper/crossmapper.py +++ b/mutalyzer_crossmapper/crossmapper.py @@ -88,10 +88,11 @@ def _degenerate_position(self, pos_m: dict) -> dict: :returns dict: a generate coding position model. """ region = pos_m['region'] - position = pos_m['position'] + if region not in ('u', 'd'): + return pos_m degenerated_pos_m = {'offset': pos_m['offset']} - + position = pos_m['position'] if region == 'u': if self._inverted: degenerated_pos_m['position'] = position + self._exons[1] - self._coding[1] + 1 @@ -113,13 +114,13 @@ def _normalize_position(self, pos_m: dict) -> dict: :returns dict: a normalized coding postion model. """ - initial_pos = {**pos_m, 'offset': 0} - coordinate = self._coding_to_coordinate(initial_pos) + base_pos = {**pos_m, 'offset': 0} + base_coordinate = self._coding_to_coordinate(base_pos) if self._inverted: - coordinate = coordinate - pos_m['offset'] + base_coordinate = base_coordinate - pos_m['offset'] else: - coordinate = coordinate + pos_m['offset'] - return self.coordinate_to_coding(coordinate) + base_coordinate = base_coordinate + pos_m['offset'] + return self.coordinate_to_coding(base_coordinate) def _coordinate_to_coding(self, coordinate: int) -> dict: """Convert a coordinate to a coding position (c./r.). @@ -163,7 +164,7 @@ def coordinate_to_coding(self, coordinate: tuple[int, int], degenerate: bool=Fal """ pos_m = self._coordinate_to_coding(coordinate) - if degenerate and pos_m['region'] in ('u', 'd'): + if degenerate: pos_m = self._degenerate_position(pos_m) return pos_m @@ -209,7 +210,7 @@ def coordinate_to_protein(self, coordinate: int) -> dict: :returns dict: Protein position model(p.). """ - pos = self.coordinate_to_coding(coordinate) + pos = self.coordinate_to_coding(coordinate, True) if pos['region'] == 'u': pos = self.coordinate_to_coding(coordinate + pos['position']) diff --git a/mutalyzer_crossmapper/locus.py b/mutalyzer_crossmapper/locus.py index 15b15a0..fff0c66 100644 --- a/mutalyzer_crossmapper/locus.py +++ b/mutalyzer_crossmapper/locus.py @@ -10,7 +10,7 @@ def __init__(self, location: list[tuple[int, int]], inverted=False) -> None: self.boundary = location[0], location[1] - 1 self._end = self.boundary[1] - self.boundary[0] - def to_position(self, coordinate: int) -> dict: + def to_position(self, coordinate: int) -> dict[str, int]: """Convert a coordinate to a proper position model. :arg int coordinate: Coordinate. diff --git a/mutalyzer_crossmapper/multi_locus.py b/mutalyzer_crossmapper/multi_locus.py index 001ed2c..0b1ef52 100644 --- a/mutalyzer_crossmapper/multi_locus.py +++ b/mutalyzer_crossmapper/multi_locus.py @@ -19,7 +19,7 @@ def _offsets(locations: list[tuple[int, int]], orientation: int) -> list[int]: class MultiLocus(object): """MultiLocus object.""" - def __init__(self, locations: list[tuple[int, int]], inverted=False) -> None: + def __init__(self, locations: list[tuple[int, int]], inverted: bool=False) -> None: """ :arg list locations: List of locus locations. :arg bool inverted: Orientation. @@ -49,7 +49,7 @@ def outside(self, coordinate: int) -> int: return coordinate - self._loci[-1].boundary[1] return 0 - def to_position(self, coordinate: int) -> dict: + def to_position(self, coordinate: int) -> dict[str: int | str]: """Convert a coordinate to a position. :arg int coordinate: Coordinate. @@ -67,7 +67,6 @@ def to_position(self, coordinate: int) -> dict: 'offset': 0, 'region': region } - return { 'position': location['position'] + self._offsets[self._direction(index)], 'offset': location['offset'], From f5f7a51353337a4795b78b96c6aea9fba3a0cb3b Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Mon, 23 Mar 2026 16:19:50 +0100 Subject: [PATCH 097/127] Refactor protein conversion --- docs/library.rst | 13 ++++++++++--- mutalyzer_crossmapper/crossmapper.py | 13 ++++--------- tests/test_crossmapper.py | 28 ++++++++++++++++++++++++++++ 3 files changed, 42 insertions(+), 12 deletions(-) diff --git a/docs/library.rst b/docs/library.rst index 83bf73a..118c9e8 100644 --- a/docs/library.rst +++ b/docs/library.rst @@ -368,14 +368,14 @@ table, we show a number of annotated examples. - region - HGVS * - 0 - - 4 + - 2 - 2 - 0 - ``u`` - invalid * - 4 - - 4 - - 2 + - 1 + - 3 - 0 - ``u`` - invalid @@ -433,6 +433,13 @@ table, we show a number of annotated examples. - 1 - ``*`` - invalid + * - 72 + - 1 + - 1 + - 0 + - ``d`` + - invalid + * - 79 - 2 - 2 diff --git a/mutalyzer_crossmapper/crossmapper.py b/mutalyzer_crossmapper/crossmapper.py index 6d4f656..4ac0ce4 100644 --- a/mutalyzer_crossmapper/crossmapper.py +++ b/mutalyzer_crossmapper/crossmapper.py @@ -77,7 +77,7 @@ def __init__(self, locations: list[tuple[int,int]], cds: tuple[int,int], inverte self._coding = (b1['position'] + b1['offset'], b0['position'] + b0['offset'] + 1) self._exons = (e1['position'], e0['position']) else: - self._coding = (b0['position'] + b0['offset'], b1['position'] + b1['offset'] +1) + self._coding = (b0['position'] + b0['offset'], b1['position'] + b1['offset'] + 1) self._exons = (e0['position'], e1['position']) def _degenerate_position(self, pos_m: dict) -> dict: @@ -210,15 +210,10 @@ def coordinate_to_protein(self, coordinate: int) -> dict: :returns dict: Protein position model(p.). """ - pos = self.coordinate_to_coding(coordinate, True) - - if pos['region'] == 'u': - pos = self.coordinate_to_coding(coordinate + pos['position']) - elif pos['region'] == 'd': - pos = self.coordinate_to_coding(coordinate - pos['position']) + pos = self.coordinate_to_coding(coordinate) position = pos['position'] - if pos['region'] == '-': + if pos['region'] in ('-', 'u'): return { 'position': abs(-position // 3), 'position_in_codon': -position % 3 + 1, @@ -235,7 +230,7 @@ def protein_to_coordinate(self, pos_m: dict) -> int: :returns int: Coordinate. """ - if pos_m['region'] == '-': + if pos_m['region'] in ('-', 'u'): return self.coding_to_coordinate( {'position': 3 * pos_m['position'] - pos_m['position_in_codon'] + 1, 'offset': pos_m['offset'], diff --git a/tests/test_crossmapper.py b/tests/test_crossmapper.py index 97c6262..7265507 100644 --- a/tests/test_crossmapper.py +++ b/tests/test_crossmapper.py @@ -667,6 +667,20 @@ def test_Coding_protein(): """Protein positions.""" crossmap = Coding(_exons, _cds) + # Boundary between upstream and 5' UTR + invariant( + crossmap.coordinate_to_protein, + 4, + crossmap.protein_to_coordinate, + {'position': 1, 'position_in_codon': 3, 'offset': 0, 'region': 'u'} + ) + invariant( + crossmap.coordinate_to_protein, + 5, + crossmap.protein_to_coordinate, + {'position': 4, 'position_in_codon': 2, 'offset': 0, 'region': '-'} + ) + # Boundary between 5' UTR and CDS invariant( crossmap.coordinate_to_protein, @@ -708,3 +722,17 @@ def test_Coding_protein(): crossmap.protein_to_coordinate, {'position': 1, 'position_in_codon': 1, 'offset': 0, 'region': '*'}, ) + + # Boundary between 3' UTR and downstream + invariant( + crossmap.coordinate_to_protein, + 71, + crossmap.protein_to_coordinate, + {'position': 2, 'position_in_codon': 2, 'offset': 0, 'region': '*'} + ) + invariant( + crossmap.coordinate_to_protein, + 72, + crossmap.protein_to_coordinate, + {'position': 1, 'position_in_codon': 1, 'offset': 0, 'region': 'd'} + ) From 79cbcc0266ea94626c00b08b6f88cdde9e44c99d Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Mon, 23 Mar 2026 16:39:00 +0100 Subject: [PATCH 098/127] Add degenerate tests --- tests/test_multi_locus.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/test_multi_locus.py b/tests/test_multi_locus.py index ab3b877..259eafa 100644 --- a/tests/test_multi_locus.py +++ b/tests/test_multi_locus.py @@ -285,7 +285,8 @@ def test_MultiLocus_degenerate(): multi_locus.to_coordinate, 4, [ - {'position': 0, 'offset': -1, 'region': 'u'}, + {'position': 0, 'offset': -1, 'region': ''}, + {'position': -1, 'offset': 0, 'region': ''}, {'position': 1, 'offset': 0, 'region': 'u'}, {'position': -1, 'offset': 0, 'region': 'u'}, ], @@ -295,6 +296,8 @@ def test_MultiLocus_degenerate(): multi_locus.to_coordinate, 72, [ + {'position': 21, 'offset': 1, 'region': ''}, + {'position': 22, 'offset': 0, 'region': ''}, {'position': 0, 'offset': 1, 'region': 'd'}, {'position': 1, 'offset': 0, 'region': 'd'}, ], @@ -310,7 +313,8 @@ def test_MultiLocus_inverted_degenerate(): 72, [ {'position': 0, 'offset': 1, 'region': 'u'}, - {'position': -1, 'offset': 0, 'region': 'u'}, + {'position': -1, 'offset': 0, 'region': ''}, + {'position': 0, 'offset': -1, 'region': ''}, {'position': 1, 'offset': 0, 'region': 'u'}, ], ) @@ -320,6 +324,8 @@ def test_MultiLocus_inverted_degenerate(): 4, [ {'position': 0, 'offset': -1, 'region': 'd'}, + {'position': 21, 'offset': 1, 'region': ''}, + {'position': 22, 'offset': 0, 'region': ''}, {'position': 1, 'offset': 0, 'region': 'd'}, ], ) From 1459451f4f263b80713b01cbc56c5c6731154724 Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Mon, 23 Mar 2026 16:58:03 +0100 Subject: [PATCH 099/127] Add backticks for region in table --- docs/library.rst | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/docs/library.rst b/docs/library.rst index 118c9e8..a069719 100644 --- a/docs/library.rst +++ b/docs/library.rst @@ -137,12 +137,12 @@ In the following table, we show a number of annotated examples. * - 0 - 5 - 0 - - ``u`` + - ``'u'`` - ``n.u5`` * - 4 - 1 - 0 - - ``u`` + - ``'u'`` - ``n.u1`` * - 5 - 1 @@ -167,12 +167,12 @@ In the following table, we show a number of annotated examples. * - 72 - 1 - 0 - - ``d`` + - ``'d'`` - ``n.d1`` * - 79 - 8 - 0 - - ``d`` + - ``'d'`` - ``n.d8`` See section :doc:`api/crossmap` for a detailed description. @@ -255,27 +255,27 @@ In the following table, we show a number of annotated examples. * - 0 - 5 - 0 - - ``u`` + - ``'u'`` - ``c.u5`` * - 4 - 1 - 0 - - ``u`` + - ``'u'`` - ``c.u1`` * - 5 - 11 - 0 - - ``-`` + - ``'-'`` - ``c.-11`` * - 24 - 3 - 5 - - ``-`` + - ``'-'`` - ``c.-3+5`` * - 31 - 1 - 0 - - ``-`` + - ``'-'`` - ``c.-1`` * - 32 - 1 @@ -295,27 +295,27 @@ In the following table, we show a number of annotated examples. * - 43 - 1 - 0 - - ``*`` + - ``'*'`` - ``c.*1`` * - 61 - 4 - -9 - - ``*`` + - ``'*'`` - ``c.*4-9`` * - 71 - 5 - 0 - - ``*`` + - ``'*'`` - ``c.*5`` * - 72 - 1 - 0 - - ``d`` + - ``'d'`` - ``c.d1`` * - 79 - 8 - 0 - - ``d`` + - ``'d'`` - ``c.d8`` @@ -371,25 +371,25 @@ table, we show a number of annotated examples. - 2 - 2 - 0 - - ``u`` + - ``'u'`` - invalid * - 4 - 1 - 3 - 0 - - ``u`` + - ``'u'`` - invalid * - 5 - 4 - 2 - 0 - - ``-`` + - ``'-'`` - invalid * - 31 - 1 - 3 - 0 - - ``-`` + - ``'-'`` - invalid * - 32 - 1 @@ -425,26 +425,26 @@ table, we show a number of annotated examples. - 1 - 1 - 0 - - ``*`` + - ``'*'`` - invalid * - 44 - 1 - 1 - 1 - - ``*`` + - ``'*'`` - invalid * - 72 - 1 - 1 - 0 - - ``d`` + - ``'d'`` - invalid * - 79 - 2 - 2 - 0 - - ``d`` + - ``'d'`` - invalid From e92c78e5276187cc798796720c56c8d09ef4c385 Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Mon, 23 Mar 2026 17:38:39 +0100 Subject: [PATCH 100/127] Cleanup --- mutalyzer_crossmapper/crossmapper.py | 38 ++++++++++++++-------------- mutalyzer_crossmapper/multi_locus.py | 8 +++--- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/mutalyzer_crossmapper/crossmapper.py b/mutalyzer_crossmapper/crossmapper.py index 4ac0ce4..4630e7d 100644 --- a/mutalyzer_crossmapper/crossmapper.py +++ b/mutalyzer_crossmapper/crossmapper.py @@ -53,7 +53,7 @@ def noncoding_to_coordinate(self, pos_m: dict) -> int: :returns int: Coordinate. """ multilocus_pos_m = {**pos_m} - if pos_m['region'] == '': + if multilocus_pos_m['region'] == '': multilocus_pos_m['position'] = pos_m['position'] - 1 return self._noncoding.to_coordinate(multilocus_pos_m) @@ -91,21 +91,21 @@ def _degenerate_position(self, pos_m: dict) -> dict: if region not in ('u', 'd'): return pos_m - degenerated_pos_m = {'offset': pos_m['offset']} - position = pos_m['position'] + degenerate_pos_m = {'offset': pos_m['offset']} + location = pos_m['position'] if region == 'u': if self._inverted: - degenerated_pos_m['position'] = position + self._exons[1] - self._coding[1] + 1 + degenerate_pos_m['position'] = location + self._exons[1] - self._coding[1] + 1 else: - degenerated_pos_m['position'] = position + self._coding[0] - degenerated_pos_m['region'] = '-' + degenerate_pos_m['position'] = location + self._coding[0] + degenerate_pos_m['region'] = '-' if region == 'd': if self._inverted: - degenerated_pos_m['position'] = position + self._coding[0] + degenerate_pos_m['position'] = location + self._coding[0] else: - degenerated_pos_m['position'] = position + self._exons[1]- self._coding[1] + 1 - degenerated_pos_m['region'] = '*' - return degenerated_pos_m + degenerate_pos_m['position'] = location + self._exons[1]- self._coding[1] + 1 + degenerate_pos_m['region'] = '*' + return degenerate_pos_m def _normalize_position(self, pos_m: dict) -> dict: """Normalize a coding position model (c./r.). @@ -176,7 +176,7 @@ def _coding_to_coordinate(self, pos_m: dict) -> int: :returns int: Coordinate. """ - position = pos_m['position'] + location = pos_m['position'] region = pos_m['region'] if region in ('u', 'd'): @@ -184,11 +184,11 @@ def _coding_to_coordinate(self, pos_m: dict) -> int: noncoding_pos_m = {'offset': pos_m['offset'], 'region': ''} if region == '': - noncoding_pos_m['position'] = position + self._coding[0] - 1 + noncoding_pos_m['position'] = location + self._coding[0] - 1 elif region == '-': - noncoding_pos_m['position'] = self._coding[0] - position + noncoding_pos_m['position'] = self._coding[0] - location else: - noncoding_pos_m['position'] = self._coding[1] + position - 1 + noncoding_pos_m['position'] = self._coding[1] + location - 1 return self._noncoding.to_coordinate(noncoding_pos_m) @@ -212,15 +212,15 @@ def coordinate_to_protein(self, coordinate: int) -> dict: """ pos = self.coordinate_to_coding(coordinate) - position = pos['position'] + location = pos['position'] if pos['region'] in ('-', 'u'): return { - 'position': abs(-position // 3), - 'position_in_codon': -position % 3 + 1, + 'position': abs(-location // 3), + 'position_in_codon': -location % 3 + 1, **{k: v for k, v in pos.items() if k != 'position'}} return { - 'position': (position + 2) // 3, - 'position_in_codon': (position + 2) % 3 + 1, + 'position': (location + 2) // 3, + 'position_in_codon': (location + 2) % 3 + 1, **{k: v for k, v in pos.items() if k != 'position'}} def protein_to_coordinate(self, pos_m: dict) -> int: diff --git a/mutalyzer_crossmapper/multi_locus.py b/mutalyzer_crossmapper/multi_locus.py index 0b1ef52..2f67777 100644 --- a/mutalyzer_crossmapper/multi_locus.py +++ b/mutalyzer_crossmapper/multi_locus.py @@ -59,17 +59,17 @@ def to_position(self, coordinate: int) -> dict[str: int | str]: index = nearest_location(self._locations, coordinate, self._inverted) outside = self._orientation * self.outside(coordinate) region = 'u' if outside < 0 else 'd' if outside > 0 else '' - location = self._loci[index].to_position(coordinate) + locus_pos_m = self._loci[index].to_position(coordinate) if outside: return { - 'position': abs(location['offset']), + 'position': abs(locus_pos_m['offset']), 'offset': 0, 'region': region } return { - 'position': location['position'] + self._offsets[self._direction(index)], - 'offset': location['offset'], + 'position': locus_pos_m['position'] + self._offsets[self._direction(index)], + 'offset': locus_pos_m['offset'], 'region': region } From f8105676ff7f1f01c296f1887d778644c9d184ce Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Wed, 25 Mar 2026 13:54:51 +0100 Subject: [PATCH 101/127] Discard normalize position model --- mutalyzer_crossmapper/crossmapper.py | 25 +++++++------------------ mutalyzer_crossmapper/multi_locus.py | 13 +++++++------ 2 files changed, 14 insertions(+), 24 deletions(-) diff --git a/mutalyzer_crossmapper/crossmapper.py b/mutalyzer_crossmapper/crossmapper.py index 4630e7d..6ad0140 100644 --- a/mutalyzer_crossmapper/crossmapper.py +++ b/mutalyzer_crossmapper/crossmapper.py @@ -45,7 +45,7 @@ def coordinate_to_noncoding(self, coordinate: int) -> dict: pos_m['position'] = pos_m['position'] + 1 return pos_m - def noncoding_to_coordinate(self, pos_m: dict) -> int: + def noncoding_to_coordinate(self, pos_m: dict, degenerate: bool=True) -> int: """Convert a noncoding position (n./r.) to a coordinate. :arg dict pos_m: Noncoding position model. @@ -53,6 +53,11 @@ def noncoding_to_coordinate(self, pos_m: dict) -> int: :returns int: Coordinate. """ multilocus_pos_m = {**pos_m} + if degenerate: + if multilocus_pos_m["region"] == '-': + multilocus_pos_m["region"] = 'u' + elif multilocus_pos_m["region"] == '*': + multilocus_pos_m['region'] = 'd' if multilocus_pos_m['region'] == '': multilocus_pos_m['position'] = pos_m['position'] - 1 return self._noncoding.to_coordinate(multilocus_pos_m) @@ -107,21 +112,6 @@ def _degenerate_position(self, pos_m: dict) -> dict: degenerate_pos_m['region'] = '*' return degenerate_pos_m - def _normalize_position(self, pos_m: dict) -> dict: - """Normalize a coding position model (c./r.). - - :arg dict pos_m: Coding position model. - - :returns dict: a normalized coding postion model. - """ - base_pos = {**pos_m, 'offset': 0} - base_coordinate = self._coding_to_coordinate(base_pos) - if self._inverted: - base_coordinate = base_coordinate - pos_m['offset'] - else: - base_coordinate = base_coordinate + pos_m['offset'] - return self.coordinate_to_coding(base_coordinate) - def _coordinate_to_coding(self, coordinate: int) -> dict: """Convert a coordinate to a coding position (c./r.). @@ -199,9 +189,8 @@ def coding_to_coordinate(self, pos_m: dict) -> int: :returns int: Coordinate. """ - normalized_pos_m = self._normalize_position(pos_m) - return self._coding_to_coordinate(normalized_pos_m) + return self._coding_to_coordinate(pos_m) def coordinate_to_protein(self, coordinate: int) -> dict: """Convert a coordinate to a protein position (p.). diff --git a/mutalyzer_crossmapper/multi_locus.py b/mutalyzer_crossmapper/multi_locus.py index 2f67777..a60e6da 100644 --- a/mutalyzer_crossmapper/multi_locus.py +++ b/mutalyzer_crossmapper/multi_locus.py @@ -82,13 +82,14 @@ def to_coordinate(self, pos_m: dict) -> int: """ region = pos_m['region'] - if pos_m['region'] in ('u', 'd'): - is_upstream = region == 'u' + if region == 'u': if self._inverted: - is_upstream = not is_upstream - if is_upstream: - return self._locations[0][0] - abs(pos_m['position']) + pos_m['offset'] - return abs(pos_m['position']) + self._locations[-1][1] + pos_m['offset'] - 1 + return self._locations[-1][1] + abs(pos_m['position']) - pos_m['offset'] - 1 + return self._locations[0][0] - abs(pos_m['position']) + pos_m['offset'] + elif region == 'd': + if self._inverted: + return self._locations[0][0] - abs(pos_m['position']) - pos_m['offset'] + return self._locations[-1][1] + abs(pos_m['position']) + pos_m['offset'] - 1 index = min( len(self._offsets), From c5d773254d8081db3303a16516af28fec599723d Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Wed, 25 Mar 2026 17:26:22 +0100 Subject: [PATCH 102/127] Fix test --- tests/test_multi_locus.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_multi_locus.py b/tests/test_multi_locus.py index 259eafa..99c7e50 100644 --- a/tests/test_multi_locus.py +++ b/tests/test_multi_locus.py @@ -312,7 +312,7 @@ def test_MultiLocus_inverted_degenerate(): multi_locus.to_coordinate, 72, [ - {'position': 0, 'offset': 1, 'region': 'u'}, + {'position': 0, 'offset': -1, 'region': 'u'}, {'position': -1, 'offset': 0, 'region': ''}, {'position': 0, 'offset': -1, 'region': ''}, {'position': 1, 'offset': 0, 'region': 'u'}, @@ -323,9 +323,9 @@ def test_MultiLocus_inverted_degenerate(): multi_locus.to_coordinate, 4, [ - {'position': 0, 'offset': -1, 'region': 'd'}, {'position': 21, 'offset': 1, 'region': ''}, {'position': 22, 'offset': 0, 'region': ''}, {'position': 1, 'offset': 0, 'region': 'd'}, + {'position': 0, 'offset': 1, 'region': 'd'}, ], ) From 49ecb5f849b65d2e3403cb9b51646a4364f5cc99 Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Thu, 26 Mar 2026 12:34:19 +0100 Subject: [PATCH 103/127] Add degenerate for NonCoding --- mutalyzer_crossmapper/crossmapper.py | 48 +++++++++++----------------- tests/test_crossmapper.py | 6 ++++ 2 files changed, 24 insertions(+), 30 deletions(-) diff --git a/mutalyzer_crossmapper/crossmapper.py b/mutalyzer_crossmapper/crossmapper.py index 6ad0140..2838da7 100644 --- a/mutalyzer_crossmapper/crossmapper.py +++ b/mutalyzer_crossmapper/crossmapper.py @@ -85,33 +85,6 @@ def __init__(self, locations: list[tuple[int,int]], cds: tuple[int,int], inverte self._coding = (b0['position'] + b0['offset'], b1['position'] + b1['offset'] + 1) self._exons = (e0['position'], e1['position']) - def _degenerate_position(self, pos_m: dict) -> dict: - """Degenerate a coding position model (c./r.). - - :arg dict pos_m: Coding position model. - - :returns dict: a generate coding position model. - """ - region = pos_m['region'] - if region not in ('u', 'd'): - return pos_m - - degenerate_pos_m = {'offset': pos_m['offset']} - location = pos_m['position'] - if region == 'u': - if self._inverted: - degenerate_pos_m['position'] = location + self._exons[1] - self._coding[1] + 1 - else: - degenerate_pos_m['position'] = location + self._coding[0] - degenerate_pos_m['region'] = '-' - if region == 'd': - if self._inverted: - degenerate_pos_m['position'] = location + self._coding[0] - else: - degenerate_pos_m['position'] = location + self._exons[1]- self._coding[1] + 1 - degenerate_pos_m['region'] = '*' - return degenerate_pos_m - def _coordinate_to_coding(self, coordinate: int) -> dict: """Convert a coordinate to a coding position (c./r.). @@ -154,10 +127,25 @@ def coordinate_to_coding(self, coordinate: tuple[int, int], degenerate: bool=Fal """ pos_m = self._coordinate_to_coding(coordinate) - if degenerate: - pos_m = self._degenerate_position(pos_m) + region = pos_m['region'] + if not degenerate or region =='': + return pos_m - return pos_m + degenerate_pos_m = {'offset': pos_m['offset']} + location = pos_m['position'] + if region == 'u': + if self._inverted: + degenerate_pos_m['position'] = location + self._exons[1] - self._coding[1] + 1 + else: + degenerate_pos_m['position'] = location + self._coding[0] + degenerate_pos_m['region'] = '-' + if region == 'd': + if self._inverted: + degenerate_pos_m['position'] = location + self._coding[0] + else: + degenerate_pos_m['position'] = location + self._exons[1]- self._coding[1] + 1 + degenerate_pos_m['region'] = '*' + return degenerate_pos_m def _coding_to_coordinate(self, pos_m: dict) -> int: """Convert a coding position (c./r.) to a coordinate. diff --git a/tests/test_crossmapper.py b/tests/test_crossmapper.py index 7265507..8f40840 100644 --- a/tests/test_crossmapper.py +++ b/tests/test_crossmapper.py @@ -107,6 +107,8 @@ def test_NonCoding_degenerate(): [ {'position': 1, 'offset': -1, 'region': ''}, {'position': 1, 'offset': 0, 'region': 'u'}, + {'position': 1, 'offset': 0, 'region': '-'}, + {'position': 2, 'offset': 1, 'region': '-'}, ], ) @@ -118,6 +120,8 @@ def test_NonCoding_degenerate(): {'position': 1, 'offset': 0, 'region': 'd'}, {'position': 22, 'offset': 1, 'region': ''}, {'position': 23, 'offset': 0, 'region': ''}, + {'position': 24, 'offset': -1, 'region': ''}, + {'position': 1, 'offset': 0, 'region': '*'}, ], ) @@ -133,6 +137,7 @@ def test_NonCoding_inverted_degenerate(): [ {'position': 1, 'offset': -1, 'region': ''}, {'position': 1, 'offset': 0, 'region': 'u'}, + {'position': 1, 'offset': 0, 'region': '-'}, ], ) @@ -142,6 +147,7 @@ def test_NonCoding_inverted_degenerate(): 4, [ {'position': 1, 'offset': 0, 'region': 'd'}, + {'position': 1, 'offset': 0, 'region': '*'}, {'position': 23, 'offset': 0, 'region': ''}, {'position': 22, 'offset': 1, 'region': ''}, ], From 78491956aeedafbc359b563b6578eb5bc8e5021e Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Thu, 26 Mar 2026 15:46:18 +0100 Subject: [PATCH 104/127] Add tests for degenerate in NonCoding --- mutalyzer_crossmapper/crossmapper.py | 26 +++++++++------ tests/test_crossmapper.py | 47 +++++++++++++++++++++++++++- 2 files changed, 63 insertions(+), 10 deletions(-) diff --git a/mutalyzer_crossmapper/crossmapper.py b/mutalyzer_crossmapper/crossmapper.py index 2838da7..2fbe449 100644 --- a/mutalyzer_crossmapper/crossmapper.py +++ b/mutalyzer_crossmapper/crossmapper.py @@ -33,7 +33,7 @@ def __init__(self, locations: list[tuple[int, int]], inverted: bool = False) -> self._noncoding = MultiLocus(locations, inverted) - def coordinate_to_noncoding(self, coordinate: int) -> dict: + def coordinate_to_noncoding(self, coordinate: int, degenerate: bool=False) -> dict: """Convert a coordinate to a noncoding position (n./r.). :arg int coordinate: Coordinate. @@ -41,11 +41,19 @@ def coordinate_to_noncoding(self, coordinate: int) -> dict: :returns dict: Noncoding position model. """ pos_m = self._noncoding.to_position(coordinate) - if pos_m['region'] == '': + region = pos_m['region'] + if region == '': pos_m['position'] = pos_m['position'] + 1 + return pos_m + + if degenerate: + if region == 'u': + pos_m["region"] = '-' + elif region == 'd': + pos_m['region'] = '*' return pos_m - def noncoding_to_coordinate(self, pos_m: dict, degenerate: bool=True) -> int: + def noncoding_to_coordinate(self, pos_m: dict) -> int: """Convert a noncoding position (n./r.) to a coordinate. :arg dict pos_m: Noncoding position model. @@ -53,13 +61,13 @@ def noncoding_to_coordinate(self, pos_m: dict, degenerate: bool=True) -> int: :returns int: Coordinate. """ multilocus_pos_m = {**pos_m} - if degenerate: - if multilocus_pos_m["region"] == '-': - multilocus_pos_m["region"] = 'u' - elif multilocus_pos_m["region"] == '*': - multilocus_pos_m['region'] = 'd' - if multilocus_pos_m['region'] == '': + region = multilocus_pos_m['region'] + if region == '': multilocus_pos_m['position'] = pos_m['position'] - 1 + elif region == '-': + multilocus_pos_m['region'] = 'u' + elif region == '*': + multilocus_pos_m['region'] = 'd' return self._noncoding.to_coordinate(multilocus_pos_m) diff --git a/tests/test_crossmapper.py b/tests/test_crossmapper.py index 8f40840..33aec12 100644 --- a/tests/test_crossmapper.py +++ b/tests/test_crossmapper.py @@ -154,6 +154,52 @@ def test_NonCoding_inverted_degenerate(): ) +def test_NonCoding_degenerate_return(): + crossmap = NonCoding(_exons) + + assert crossmap.coordinate_to_noncoding(4, True) == { + 'position': 1, + 'offset': 0, + 'region': '-', + } + + assert crossmap.coordinate_to_noncoding(72, True) == { + 'position': 1, + 'offset': 0, + 'region': '*', + } + + +def test_NonCoding_inverted_degenerate_return(): + crossmap = NonCoding(_exons, True) + + assert crossmap.coordinate_to_noncoding(72, True) == { + 'position': 1, + 'offset': 0, + 'region': '-', + } + + assert crossmap.coordinate_to_noncoding(4, True) == { + 'position': 1, + 'offset': 0, + 'region': '*', + } + + +def test_NonCoding_degenerate_no_return(): + """Degenerate internal positions do not exist.""" + crossmap = NonCoding(_exons) + + assert crossmap.coordinate_to_noncoding(25) == crossmap.coordinate_to_noncoding(25, True) + + +def test_NonCoding_inverted_degenerate_no_return(): + """Degenerate internal positions do not exist.""" + crossmap = NonCoding(_exons, True) + + assert crossmap.coordinate_to_noncoding(25) == crossmap.coordinate_to_noncoding(25, True) + + def test_Coding(): """Forward oriented coding transcript.""" crossmap = Coding(_exons, _cds) @@ -544,7 +590,6 @@ def test_Coding_inverted_degenerate_return(): """Degenerate upstream and downstream positions may be returned.""" crossmap = Coding([(10, 20)], (11, 19), True) - assert crossmap.coordinate_to_coding(20, True) == { 'position': 2, 'offset': 0, From 7c3cdde5d1efd9b399fddd6904e9f592f2b847e6 Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Thu, 26 Mar 2026 15:46:48 +0100 Subject: [PATCH 105/127] Update document --- docs/library.rst | 161 +++++++++++++++++++++++++---------------------- 1 file changed, 86 insertions(+), 75 deletions(-) diff --git a/docs/library.rst b/docs/library.rst index a069719..564d9cd 100644 --- a/docs/library.rst +++ b/docs/library.rst @@ -18,11 +18,11 @@ They are represented as 1-key dictionaries. Below is an example of ``g.1`` in HG .. code-block:: python - {'position':1} + {'position': 1} Where: -- **position**: an integer repersenting a base position (>0) +- **position**: an integer representing a nucleotide position (>0) Genomic Position Conversion ~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -38,8 +38,8 @@ used to convert to and from genomic positions. .. code:: python >>> crossmap.coordinate_to_genomic(0) - 1 - >>> crossmap.genomic_to_coordinate({'position':1}) + {'position': 1} + >>> crossmap.genomic_to_coordinate({'position': 1}) 0 See section :doc:`api/crossmap` for a detailed description. @@ -68,11 +68,11 @@ as 3-key dictionaries. Below is an example of ``n.14+1`` in HGVS. Where: -- **position**: an interger representing a transcript position (>0) +- **position**: an integer representing a transcript position (>0) - **offset**: an integer indicating the offset relative to the position (negative for upstream, positive for downstream) -- **region**: a string describing the region type (``''`` for standard, ``'u'`` for upstream, - ``'d'`` for downstream) +- **region**: a string describing the region type (empty for positions within a non-coding transcript, ``u`` for upstream, + ``d`` for downstream) NonCoding Position Conversion ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -92,24 +92,35 @@ position ``n.14+1``. We can convert between these two as follows. .. code:: python >>> crossmap.coordinate_to_noncoding(35) - {'position':14, 'offset':1, 'region':''} - >>> crossmap.noncoding_to_coordinate({'position':14, 'offset':1, 'region':''}) - {'position':14, 'offset':1, 'region':''} + {'position': 14, 'offset': 1, 'region': ''} + >>> crossmap.noncoding_to_coordinate({'position': 14, 'offset': 1, 'region': ''}) + 35 -When the coordinate is upstream or downstream of the transcript, we use ``'u'`` to -present upstream and ``'d'`` to present downstream. +When the coordinate is upstream or downstream of the transcript, we use ``u`` to +present upstream and ``d`` to present downstream. .. code:: python >>> crossmap.coordinate_to_noncoding(2) - {'position':3, 'offset':0, 'region':'u'} - >>> crossmap.noncoding_to_coordinate({'position':3, 'offset':0, 'region':'u'}) + {'position': 3, 'offset': 0, 'region': 'u'} + >>> crossmap.noncoding_to_coordinate({'position': 3, 'offset': 0, 'region': 'u'}) 2 >>> crossmap.coordinate_to_noncoding(73) - {'position':2, 'offset':0, 'region':'d'} - >>> crossmap.noncoding_to_coordinate({'position':2, 'offset':0, 'region':'d'}) + {'position': 2, 'offset': 0, 'region': 'd'} + >>> crossmap.noncoding_to_coordinate({'position': 2, 'offset': 0, 'region': 'd'}) 73 +The ``coordinate_to_noncoding()`` function accepts an optional ``degenerate`` +argument. When set to ``True``, positions outside of the transcript are no +longer described using the ``u`` or ``d`` notation, ``-`` and ``*``are used +instead. + +.. code:: python + + >>> crossmap.coordinate_to_noncoding(2) + {'position': 3, 'offset': 0, 'region': 'u'} + >>> crossmap.coordinate_to_noncoding(2, True) + {'position': 3, 'offset': 0, 'region': '-'} For transcripts that reside on the reverse complement strand, the ``inverted`` parameter should be set to ``True``. In our example, HGVS position ``g.36`` @@ -119,8 +130,8 @@ parameter should be set to ``True``. In our example, HGVS position ``g.36`` >>> crossmap = NonCoding(exons, inverted=True) >>> crossmap.coordinate_to_noncoding(35) - {'position':9, 'offset':-1, 'region':''} - >>> crossmap.noncoding_to_coordinate({'position':9, 'offset':-1, 'region':''}) + {'position': 9, 'offset': -1, 'region': ''} + >>> crossmap.noncoding_to_coordinate({'position': 9, 'offset': -1, 'region': ''}) 35 In the following table, we show a number of annotated examples. @@ -137,12 +148,12 @@ In the following table, we show a number of annotated examples. * - 0 - 5 - 0 - - ``'u'`` + - ``u`` - ``n.u5`` * - 4 - 1 - 0 - - ``'u'`` + - ``u`` - ``n.u1`` * - 5 - 1 @@ -167,12 +178,12 @@ In the following table, we show a number of annotated examples. * - 72 - 1 - 0 - - ``'d'`` + - ``d`` - ``n.d1`` * - 79 - 8 - 0 - - ``'d'`` + - ``d`` - ``n.d8`` See section :doc:`api/crossmap` for a detailed description. @@ -186,7 +197,7 @@ positioning systems should be done via a coordinate. Coding Position Model ~~~~~~~~~~~~~~~~~~~~~ -Coding positions follow the HGVS ``c`` coordinate system. They are +Coding positions follow the HGVS ``c.`` coordinate system. They are represented as 3-key dictionaries. Here is an example of ``c.*1+3``. .. code-block:: python @@ -199,11 +210,11 @@ represented as 3-key dictionaries. Here is an example of ``c.*1+3``. Where: -- **position**: an interger representing a transcript position (>0) +- **position**: an integer representing a transcript position (>0) - **offset**: an integer indicating the offset relative to the position (negative for upstream, positive for downstream) -- **region**: a string describing the region type (``''`` for standard coding positions, - ``'-'`` for 5' UTR, ``'*'`` for 3' UTR, ``'u'`` for upstream and ``'d'`` for downstream) +- **region**: a string describing the region type (empty for positions within coding DNA sequence, + ``-`` for 5' UTR, ``*`` for 3' UTR, ``u`` for upstream and ``d`` for downstream) Coding Position Conversion ~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -225,21 +236,21 @@ position ``c.-1``. We can convert between these two as follows. .. code:: python >>> crossmap.coordinate_to_coding(31) - {'position':1, 'offset':0, 'region':'-'} - >>> crossmap.coding_to_coordinate({'position':1, 'offset':0, 'region':'-'}) + {'position': 1, 'offset': 0, 'region': '-'} + >>> crossmap.coding_to_coordinate({'position': 1, 'offset': 0, 'region': '-'}) 31 The ``coordinate_to_coding()`` function accepts an optional ``degenerate`` argument. When set to ``True``, positions outside of the transcript are no -longer described using the ``'u'`` or ``'d'`` notation, ``'-'`` and ``'*'`` -are used instead. +longer described using the ``u`` or ``d`` notation, ``-`` and ``*``are used +instead. Note that the value of `position` is adjusted accordingly. .. code:: python >>> crossmap.coordinate_to_coding(4) - {'position':1, 'offset':0, 'region':'u'} + {'position': 1, 'offset': 0, 'region': 'u'} >>> crossmap.coordinate_to_coding(4, True) - {'position':12, 'offset':0, 'region':'-'} + {'position': 12, 'offset': 0, 'region': '-'} In the following table, we show a number of annotated examples. @@ -255,27 +266,27 @@ In the following table, we show a number of annotated examples. * - 0 - 5 - 0 - - ``'u'`` + - ``u`` - ``c.u5`` * - 4 - 1 - 0 - - ``'u'`` + - ``u`` - ``c.u1`` * - 5 - 11 - 0 - - ``'-'`` + - ``-`` - ``c.-11`` * - 24 - 3 - 5 - - ``'-'`` + - ``-`` - ``c.-3+5`` * - 31 - 1 - 0 - - ``'-'`` + - ``-`` - ``c.-1`` * - 32 - 1 @@ -295,27 +306,27 @@ In the following table, we show a number of annotated examples. * - 43 - 1 - 0 - - ``'*'`` + - ``*`` - ``c.*1`` * - 61 - 4 - -9 - - ``'*'`` + - ``*`` - ``c.*4-9`` * - 71 - 5 - 0 - - ``'*'`` + - ``*`` - ``c.*5`` * - 72 - 1 - 0 - - ``'d'`` + - ``d`` - ``c.d1`` * - 79 - 8 - 0 - - ``'d'`` + - ``d`` - ``c.d8`` @@ -324,24 +335,24 @@ Protein Additionally, the functions ``coordinate_to_protein()`` and ``protein_to_coordinate()`` can be used. These functions use a 4-key dictionary -to represent a protein position. Here is an example of ``p.1`` in HGVS. +to represent a protein position. Here is one example of three posibilities +for ``p.1`` in HGVS. .. code-block:: python { 'position': 1, 'position_in_codon': 3, - 'offset': 3, + 'offset': 0, 'region': '' } Where: -- **position**: an interger representing the protein position (>0) +- **position**: an integer representing an amino acid position (>0) - **position_in_codon**: an integer indicating the nucleotide index within the codon (1, 2, or 3) -- **offset**: an integer indicating offset relative to the codon -- **region**: a string describing the region type (``''`` for standard positions) - +- **offset**: an integer indicating offset relative to the nucleotide specified by `position_in_codon` in the codon +- **region**: a string describing the region type (empty for vaid amino acid positions) In our example the HGVS position ``g.42`` (coordinate `41`) corresponds with position ``p.2``. We can convert between these to as follows. @@ -349,12 +360,12 @@ position ``p.2``. We can convert between these to as follows. .. code:: python >>> crossmap.coordinate_to_protein(41) - {'position':2, 'position_in_codon':2, 'offset':0, 'region':''} - >>> crossmap.protein_to_coordinate({'position':2, 'position_in_codon':2, 'offset':0, 'region':''}) + {'position': 2, 'position_in_codon': 2, 'offset': 0, 'region': ''} + >>> crossmap.protein_to_coordinate({'position': 2, 'position_in_codon': 2, 'offset': 0, 'region': ''}) 41 **Note:** protein position only corresponds with the HGVS "p." notation -when the offset equals ``0`` and the region equals ``''``. In the following +when the offset equals ``0`` and the region equals empty. In the following table, we show a number of annotated examples. .. _table_protein: @@ -371,26 +382,26 @@ table, we show a number of annotated examples. - 2 - 2 - 0 - - ``'u'`` - - invalid + - ``u`` + - * - 4 - 1 - 3 - 0 - - ``'u'`` - - invalid + - ``u`` + - * - 5 - 4 - 2 - 0 - - ``'-'`` - - invalid + - ``-`` + - * - 31 - 1 - 3 - 0 - - ``'-'`` - - invalid + - ``-`` + - * - 32 - 1 - 1 @@ -414,7 +425,7 @@ table, we show a number of annotated examples. - 3 - 1 - - - ``p.1`` + - * - 42 - 2 - 3 @@ -425,27 +436,27 @@ table, we show a number of annotated examples. - 1 - 1 - 0 - - ``'*'`` - - invalid + - ``*`` + - * - 44 - 1 - 1 - 1 - - ``'*'`` - - invalid + - ``*`` + - * - 72 - 1 - 1 - 0 - - ``'d'`` - - invalid + - ``d`` + - * - 79 - 2 - 2 - 0 - - ``'d'`` - - invalid + - ``d`` + - See section :doc:`api/crossmap` for a detailed description. @@ -506,9 +517,9 @@ This differs from HGVS numbering, which is **1-based**. .. code:: python >>> locus.to_position(9) - {'position':0, 'offset':-1} - >>> locus.to_coordinate({'position':0, 'offset':-1}) - {'position':0, 'offset':-1} + {'position': 0, 'offset': -1} + >>> locus.to_coordinate({'position': 0, 'offset': -1}) + 9 For loci that reside on the reverse complement strand, the optional ``inverted`` constructor parameter should be set to ``True``. @@ -534,12 +545,12 @@ The interface to this class is similar to that of the ``Locus`` class. Functions .. code:: python >>> multilocus.to_position(22) - {'position':9, 'offset':3, 'region':''} - >>> multilocus.to_coordinate({'position':9, 'offset':3, 'region':''}) + {'position': 9, 'offset': 3, 'region': ''} + >>> multilocus.to_coordinate({'position': 9, 'offset': 3, 'region': ''}) 22 >>> multilocus.to_position(38) - {'position':10, 'offset':-2, 'region':''} - >>> multilocus.to_coordinate({'position':10, 'offset':-2, 'region':''} + {'position': 10, 'offset': -2, 'region': ''} + >>> multilocus.to_coordinate({'position': 10, 'offset': -2, 'region': ''}) 38 See section :doc:`api/multi_locus` for a detailed description. From 651d1700881835eed4f6b34a5ae4faf32c957165 Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Thu, 26 Mar 2026 15:56:28 +0100 Subject: [PATCH 106/127] Formatting --- README.rst | 14 +++++++------- docs/library.rst | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/README.rst b/README.rst index fd0308c..ebd1ed6 100644 --- a/README.rst +++ b/README.rst @@ -53,8 +53,8 @@ positions and coordinates. >>> from mutalyzer_crossmapper import Genomic >>> crossmap = Genomic() >>> crossmap.coordinate_to_genomic(0) - 1 - >>> crossmap.genomic_to_coordinate({'position':1}) + {'position': 1} + >>> crossmap.genomic_to_coordinate({'position': 1}) 0 On top of the functionality provided by the ``Genomic`` class, the @@ -67,8 +67,8 @@ positions and coordinates. >>> exons = [(5, 8), (14, 20), (30, 35), (40, 44), (50, 52), (70, 72)] >>> crossmap = NonCoding(exons) >>> crossmap.coordinate_to_noncoding(35) - {'position':14, 'offset':1, 'region':''} - >>> crossmap.noncoding_to_coordinate({'position':14, 'offset':1, 'region':''}) + {'position': 14, 'offset': 1, 'region': ''} + >>> crossmap.noncoding_to_coordinate({'position': 14, 'offset': 1, 'region': ''}) 35 Add the flag ``inverted=True`` to the constructor when the transcript resides @@ -84,7 +84,7 @@ coordinates as well as conversions between protein positions and coordinates. >>> cds = (32, 43) >>> crossmap = Coding(exons, cds) >>> crossmap.coordinate_to_coding(31) - {'position':1, 'offset':0, 'region':'-'} + {'position': 1, 'offset': 0, 'region': '-'} >>> crossmap.coding_to_coordinate({'position':1, 'offset':0, 'region':'-'}) 31 @@ -96,8 +96,8 @@ Conversions between protein positions and coordinates are done as follows. .. code:: python >>> crossmap.coordinate_to_protein(41) - {'position':2, 'position_in_codon': 2, 'offset':0, 'region':''} - >>> crossmap.protein_to_coordinate({'position':2, 'position_in_codon':2, 'offset':0, 'region':''}) + {'position': 2, 'position_in_codon': 2, 'offset': 0, 'region': ''} + >>> crossmap.protein_to_coordinate({'position': 2, 'position_in_codon': 2, 'offset': 0, 'region': ''}) 41 diff --git a/docs/library.rst b/docs/library.rst index 564d9cd..bc982a0 100644 --- a/docs/library.rst +++ b/docs/library.rst @@ -8,7 +8,7 @@ The ``Genomic`` class --------------------- The ``Genomic`` class provides an interface to conversions between genomic -(``g.``, ``m.``, ``n.``) positions and coordinates. +(``g.``, ``m.``, ``o.``) positions and coordinates. Genomic Position Model ~~~~~~~~~~~~~~~~~~~~~~~ From 21f84582e8ae16f04706a7f22265f4ed0455c63d Mon Sep 17 00:00:00 2001 From: "X.Liu" <34545147+XLIU-hub@users.noreply.github.com> Date: Thu, 26 Mar 2026 16:08:51 +0100 Subject: [PATCH 107/127] Update library.rst --- docs/library.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/library.rst b/docs/library.rst index bc982a0..4264ff9 100644 --- a/docs/library.rst +++ b/docs/library.rst @@ -71,8 +71,8 @@ Where: - **position**: an integer representing a transcript position (>0) - **offset**: an integer indicating the offset relative to the position (negative for upstream, positive for downstream) -- **region**: a string describing the region type (empty for positions within a non-coding transcript, ``u`` for upstream, - ``d`` for downstream) +- **region**: a string describing the region type (empty for positions within a non-coding + transcript, ``u`` for upstream, ``d`` for downstream) NonCoding Position Conversion ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -112,7 +112,7 @@ present upstream and ``d`` to present downstream. The ``coordinate_to_noncoding()`` function accepts an optional ``degenerate`` argument. When set to ``True``, positions outside of the transcript are no -longer described using the ``u`` or ``d`` notation, ``-`` and ``*``are used +longer described using the ``u`` or ``d`` notation, ``-`` and ``*`` are used instead. .. code:: python @@ -242,7 +242,7 @@ position ``c.-1``. We can convert between these two as follows. The ``coordinate_to_coding()`` function accepts an optional ``degenerate`` argument. When set to ``True``, positions outside of the transcript are no -longer described using the ``u`` or ``d`` notation, ``-`` and ``*``are used +longer described using the ``u`` or ``d`` notation, ``-`` and ``*`` are used instead. Note that the value of `position` is adjusted accordingly. .. code:: python From 34d084479060863c305b580edf7d7d9f7e764d8d Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Thu, 26 Mar 2026 16:21:40 +0100 Subject: [PATCH 108/127] Fix typo --- docs/library.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/library.rst b/docs/library.rst index 4264ff9..ec1bdc2 100644 --- a/docs/library.rst +++ b/docs/library.rst @@ -71,7 +71,7 @@ Where: - **position**: an integer representing a transcript position (>0) - **offset**: an integer indicating the offset relative to the position (negative for upstream, positive for downstream) -- **region**: a string describing the region type (empty for positions within a non-coding +- **region**: a string describing the region type (empty for positions within a non-coding transcript, ``u`` for upstream, ``d`` for downstream) NonCoding Position Conversion @@ -243,7 +243,7 @@ position ``c.-1``. We can convert between these two as follows. The ``coordinate_to_coding()`` function accepts an optional ``degenerate`` argument. When set to ``True``, positions outside of the transcript are no longer described using the ``u`` or ``d`` notation, ``-`` and ``*`` are used -instead. Note that the value of `position` is adjusted accordingly. +instead. Note that the value of ``position`` is adjusted accordingly. .. code:: python From 8873a779365da276273a33c7e24e764f467ba166 Mon Sep 17 00:00:00 2001 From: "X.Liu" <34545147+XLIU-hub@users.noreply.github.com> Date: Thu, 26 Mar 2026 16:23:16 +0100 Subject: [PATCH 109/127] Update .gitignore --- .gitignore | 1 - 1 file changed, 1 deletion(-) diff --git a/.gitignore b/.gitignore index 324acae..381420f 100644 --- a/.gitignore +++ b/.gitignore @@ -7,4 +7,3 @@ docs/_build/ mutalyzer_crossmapper.egg-info/ mutalyzer_crossmapper/__pycache__/ tests/__pycache__/ -tmp \ No newline at end of file From c9e58c67df7c26e82c73c4d9e3fcf329d3d7ff4f Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Fri, 27 Mar 2026 09:44:09 +0100 Subject: [PATCH 110/127] Set upstream and downstream in multilocus as 0 based --- mutalyzer_crossmapper/crossmapper.py | 24 +++++++++++++----------- mutalyzer_crossmapper/multi_locus.py | 10 +++++----- tests/test_multi_locus.py | 20 ++++++++------------ 3 files changed, 26 insertions(+), 28 deletions(-) diff --git a/mutalyzer_crossmapper/crossmapper.py b/mutalyzer_crossmapper/crossmapper.py index 2fbe449..63cee23 100644 --- a/mutalyzer_crossmapper/crossmapper.py +++ b/mutalyzer_crossmapper/crossmapper.py @@ -42,8 +42,8 @@ def coordinate_to_noncoding(self, coordinate: int, degenerate: bool=False) -> di """ pos_m = self._noncoding.to_position(coordinate) region = pos_m['region'] + pos_m['position'] = pos_m['position'] + 1 if region == '': - pos_m['position'] = pos_m['position'] + 1 return pos_m if degenerate: @@ -62,9 +62,8 @@ def noncoding_to_coordinate(self, pos_m: dict) -> int: """ multilocus_pos_m = {**pos_m} region = multilocus_pos_m['region'] - if region == '': - multilocus_pos_m['position'] = pos_m['position'] - 1 - elif region == '-': + multilocus_pos_m['position'] = multilocus_pos_m['position'] - 1 + if region == '-': multilocus_pos_m['region'] = 'u' elif region == '*': multilocus_pos_m['region'] = 'd' @@ -103,6 +102,7 @@ def _coordinate_to_coding(self, coordinate: int) -> dict: noncoding_pos_m = self._noncoding.to_position(coordinate) if noncoding_pos_m['region'] in ('u', 'd'): + noncoding_pos_m['position'] = noncoding_pos_m['position'] + 1 return noncoding_pos_m location = noncoding_pos_m['position'] @@ -136,7 +136,7 @@ def coordinate_to_coding(self, coordinate: tuple[int, int], degenerate: bool=Fal pos_m = self._coordinate_to_coding(coordinate) region = pos_m['region'] - if not degenerate or region =='': + if not degenerate or region == '': return pos_m degenerate_pos_m = {'offset': pos_m['offset']} @@ -164,19 +164,21 @@ def _coding_to_coordinate(self, pos_m: dict) -> int: """ location = pos_m['position'] region = pos_m['region'] + multilocus_pos_m = {**pos_m} if region in ('u', 'd'): - return self._noncoding.to_coordinate(pos_m) + multilocus_pos_m['position'] = location - 1 + return self._noncoding.to_coordinate(multilocus_pos_m) - noncoding_pos_m = {'offset': pos_m['offset'], 'region': ''} + multilocus_pos_m['region'] = '' if region == '': - noncoding_pos_m['position'] = location + self._coding[0] - 1 + multilocus_pos_m['position'] = location + self._coding[0] - 1 elif region == '-': - noncoding_pos_m['position'] = self._coding[0] - location + multilocus_pos_m['position'] = self._coding[0] - location else: - noncoding_pos_m['position'] = self._coding[1] + location - 1 + multilocus_pos_m['position'] = self._coding[1] + location - 1 - return self._noncoding.to_coordinate(noncoding_pos_m) + return self._noncoding.to_coordinate(multilocus_pos_m) def coding_to_coordinate(self, pos_m: dict) -> int: """Convert a coding position (c./r.) to a coordinate. diff --git a/mutalyzer_crossmapper/multi_locus.py b/mutalyzer_crossmapper/multi_locus.py index a60e6da..6984c97 100644 --- a/mutalyzer_crossmapper/multi_locus.py +++ b/mutalyzer_crossmapper/multi_locus.py @@ -63,7 +63,7 @@ def to_position(self, coordinate: int) -> dict[str: int | str]: if outside: return { - 'position': abs(locus_pos_m['offset']), + 'position': abs(locus_pos_m['offset']) - 1, 'offset': 0, 'region': region } @@ -84,12 +84,12 @@ def to_coordinate(self, pos_m: dict) -> int: if region == 'u': if self._inverted: - return self._locations[-1][1] + abs(pos_m['position']) - pos_m['offset'] - 1 - return self._locations[0][0] - abs(pos_m['position']) + pos_m['offset'] + return self._locations[-1][1] + abs(pos_m['position']) - pos_m['offset'] + return self._locations[0][0] - abs(pos_m['position']) + pos_m['offset'] - 1 elif region == 'd': if self._inverted: - return self._locations[0][0] - abs(pos_m['position']) - pos_m['offset'] - return self._locations[-1][1] + abs(pos_m['position']) + pos_m['offset'] - 1 + return self._locations[0][0] - abs(pos_m['position']) - pos_m['offset'] - 1 + return self._locations[-1][1] + abs(pos_m['position']) + pos_m['offset'] index = min( len(self._offsets), diff --git a/tests/test_multi_locus.py b/tests/test_multi_locus.py index 99c7e50..9725ad7 100644 --- a/tests/test_multi_locus.py +++ b/tests/test_multi_locus.py @@ -35,7 +35,7 @@ def test_MultiLocus(): multi_locus.to_position, 4, multi_locus.to_coordinate, - {'position': 1, 'offset': 0, 'region': 'u'}, + {'position': 0, 'offset': 0, 'region': 'u'}, ) invariant( @@ -94,7 +94,7 @@ def test_MultiLocus(): multi_locus.to_position, 72, multi_locus.to_coordinate, - {'position': 1, 'offset': 0, 'region': 'd'}, + {'position': 0, 'offset': 0, 'region': 'd'}, ) @@ -107,7 +107,7 @@ def test_MultiLocus_inverted(): multi_locus.to_position, 72, multi_locus.to_coordinate, - {'position': 1, 'offset': 0, 'region': 'u'}, + {'position': 0, 'offset': 0, 'region': 'u'}, ) invariant( multi_locus.to_position, @@ -165,7 +165,7 @@ def test_MultiLocus_inverted(): multi_locus.to_position, 4, multi_locus.to_coordinate, - {'position': 1, 'offset': 0, 'region': 'd'}, + {'position': 0, 'offset': 0, 'region': 'd'}, ) @@ -287,8 +287,7 @@ def test_MultiLocus_degenerate(): [ {'position': 0, 'offset': -1, 'region': ''}, {'position': -1, 'offset': 0, 'region': ''}, - {'position': 1, 'offset': 0, 'region': 'u'}, - {'position': -1, 'offset': 0, 'region': 'u'}, + {'position': 0, 'offset': 0, 'region': 'u'}, ], ) @@ -298,8 +297,7 @@ def test_MultiLocus_degenerate(): [ {'position': 21, 'offset': 1, 'region': ''}, {'position': 22, 'offset': 0, 'region': ''}, - {'position': 0, 'offset': 1, 'region': 'd'}, - {'position': 1, 'offset': 0, 'region': 'd'}, + {'position': 0, 'offset': 0, 'region': 'd'}, ], ) @@ -312,10 +310,9 @@ def test_MultiLocus_inverted_degenerate(): multi_locus.to_coordinate, 72, [ - {'position': 0, 'offset': -1, 'region': 'u'}, {'position': -1, 'offset': 0, 'region': ''}, {'position': 0, 'offset': -1, 'region': ''}, - {'position': 1, 'offset': 0, 'region': 'u'}, + {'position': 0, 'offset': 0, 'region': 'u'}, ], ) @@ -325,7 +322,6 @@ def test_MultiLocus_inverted_degenerate(): [ {'position': 21, 'offset': 1, 'region': ''}, {'position': 22, 'offset': 0, 'region': ''}, - {'position': 1, 'offset': 0, 'region': 'd'}, - {'position': 0, 'offset': 1, 'region': 'd'}, + {'position': 0, 'offset': 0, 'region': 'd'}, ], ) From 783223d90ef46422bc14d4e016ad2e06c2ea53a7 Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Fri, 27 Mar 2026 11:09:03 +0100 Subject: [PATCH 111/127] Fix typing mistake --- mutalyzer_crossmapper/crossmapper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mutalyzer_crossmapper/crossmapper.py b/mutalyzer_crossmapper/crossmapper.py index 63cee23..6c5a7be 100644 --- a/mutalyzer_crossmapper/crossmapper.py +++ b/mutalyzer_crossmapper/crossmapper.py @@ -125,7 +125,7 @@ def _coordinate_to_coding(self, coordinate: int) -> dict: 'region': '' } - def coordinate_to_coding(self, coordinate: tuple[int, int], degenerate: bool=False) -> dict: + def coordinate_to_coding(self, coordinate: int, degenerate: bool=False) -> dict: """Convert a coordinate to a coding position (c./r.). :arg int coordinate: Coordinate. From a7196f4a6fb7bbfff0e412a937b8e1428b19a563 Mon Sep 17 00:00:00 2001 From: "X.Liu" <34545147+XLIU-hub@users.noreply.github.com> Date: Fri, 27 Mar 2026 17:57:10 +0100 Subject: [PATCH 112/127] Use importlib instead of pkg_resources --- mutalyzer_crossmapper/__init__.py | 31 ++++++++----------------------- 1 file changed, 8 insertions(+), 23 deletions(-) diff --git a/mutalyzer_crossmapper/__init__.py b/mutalyzer_crossmapper/__init__.py index 284f6c8..7d1abc0 100644 --- a/mutalyzer_crossmapper/__init__.py +++ b/mutalyzer_crossmapper/__init__.py @@ -1,18 +1,4 @@ -"""Crossmapper position conversion library. - -Definitions: - -- Coordinates are zero based, non-negative integers. -- Locations are zero based right-open non-negative integer intervals, - consistent with Python's range() and sequence slicing functions. -- Loci and exons are locations. -- An exon list is a list of locations that, when flattened, is an increasing - sequence. -- A position is a 2-tuple of which the first element is a one based non-zero - integer relative to an element in a location and the second element is an - integer offset relative to the first element. -""" -from pkg_resources import get_distribution +from importlib.metadata import metadata from .crossmapper import Coding, Genomic, NonCoding from .location import nearest_location @@ -20,14 +6,13 @@ from .multi_locus import MultiLocus -def _get_metadata(name): - pkg = get_distribution('mutalyzer_crossmapper') - - for line in pkg.get_metadata_lines(pkg.PKG_INFO): - if line.startswith('{}: '.format(name)): - return line.split(': ')[1] - - return '' +def _get_metadata(name: str) -> str: + """Get metadata from the package using importlib.metadata""" + try: + meta = metadata('mutalyzer_crossmapper') + return meta.get(name, '') + except Exception: + return '' _copyright_notice = 'Copyright (c) {} <{}>'.format( From 88ce74f5bcc0f66d4f074e84fbe43752d7ddab64 Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Mon, 30 Mar 2026 11:40:58 +0200 Subject: [PATCH 113/127] Fix typing and single quote --- mutalyzer_crossmapper/crossmapper.py | 31 +++++++++++++++------------- mutalyzer_crossmapper/locus.py | 4 ++-- mutalyzer_crossmapper/multi_locus.py | 4 ++-- 3 files changed, 21 insertions(+), 18 deletions(-) diff --git a/mutalyzer_crossmapper/crossmapper.py b/mutalyzer_crossmapper/crossmapper.py index 6c5a7be..ac4ef65 100644 --- a/mutalyzer_crossmapper/crossmapper.py +++ b/mutalyzer_crossmapper/crossmapper.py @@ -24,7 +24,7 @@ def genomic_to_coordinate(self, pos_m: dict) -> int: class NonCoding(Genomic): """NonCoding crossmap object.""" - def __init__(self, locations: list[tuple[int, int]], inverted: bool = False) -> None: + def __init__(self, locations: list[tuple[int, int]], inverted: bool=False) -> None: """ :arg list locations: List of locus locations. :arg bool inverted: Orientation. @@ -48,31 +48,34 @@ def coordinate_to_noncoding(self, coordinate: int, degenerate: bool=False) -> di if degenerate: if region == 'u': - pos_m["region"] = '-' + pos_m['region'] = '-' elif region == 'd': pos_m['region'] = '*' return pos_m - def noncoding_to_coordinate(self, pos_m: dict) -> int: + def noncoding_to_coordinate(self, pos_m: dict[str, int | str]) -> int: """Convert a noncoding position (n./r.) to a coordinate. :arg dict pos_m: Noncoding position model. :returns int: Coordinate. """ - multilocus_pos_m = {**pos_m} - region = multilocus_pos_m['region'] - multilocus_pos_m['position'] = multilocus_pos_m['position'] - 1 - if region == '-': + multilocus_pos_m = {**pos_m, 'position': pos_m['position'] - 1} + if multilocus_pos_m['region'] == '-': multilocus_pos_m['region'] = 'u' - elif region == '*': + elif multilocus_pos_m['region'] == '*': multilocus_pos_m['region'] = 'd' return self._noncoding.to_coordinate(multilocus_pos_m) class Coding(NonCoding): """Coding crossmap object.""" - def __init__(self, locations: list[tuple[int,int]], cds: tuple[int,int], inverted : bool=False) -> None: + def __init__( + self, + locations: list[tuple[int,int]], + cds: tuple[int,int], + inverted : bool=False + ) -> None: """ :arg list locations: List of locus locations. :arg tuple cds: Locus location. @@ -92,7 +95,7 @@ def __init__(self, locations: list[tuple[int,int]], cds: tuple[int,int], inverte self._coding = (b0['position'] + b0['offset'], b1['position'] + b1['offset'] + 1) self._exons = (e0['position'], e1['position']) - def _coordinate_to_coding(self, coordinate: int) -> dict: + def _coordinate_to_coding(self, coordinate: int) -> dict[str, int | str]: """Convert a coordinate to a coding position (c./r.). :arg int coordinate: Coordinate. @@ -155,7 +158,7 @@ def coordinate_to_coding(self, coordinate: int, degenerate: bool=False) -> dict: degenerate_pos_m['region'] = '*' return degenerate_pos_m - def _coding_to_coordinate(self, pos_m: dict) -> int: + def _coding_to_coordinate(self, pos_m: dict[str, int | str]) -> int: """Convert a coding position (c./r.) to a coordinate. :arg dict pos_m: Coding position model (c./r.). @@ -180,7 +183,7 @@ def _coding_to_coordinate(self, pos_m: dict) -> int: return self._noncoding.to_coordinate(multilocus_pos_m) - def coding_to_coordinate(self, pos_m: dict) -> int: + def coding_to_coordinate(self, pos_m: dict[str, int | str]) -> int: """Convert a coding position (c./r.) to a coordinate. :arg dict pos_m: Coding position model (c./r.). @@ -190,7 +193,7 @@ def coding_to_coordinate(self, pos_m: dict) -> int: return self._coding_to_coordinate(pos_m) - def coordinate_to_protein(self, coordinate: int) -> dict: + def coordinate_to_protein(self, coordinate: int) -> dict[str, int | str]: """Convert a coordinate to a protein position (p.). :arg int coordinate: Coordinate. @@ -210,7 +213,7 @@ def coordinate_to_protein(self, coordinate: int) -> dict: 'position_in_codon': (location + 2) % 3 + 1, **{k: v for k, v in pos.items() if k != 'position'}} - def protein_to_coordinate(self, pos_m: dict) -> int: + def protein_to_coordinate(self, pos_m: dict[str, int | str]) -> int: """Convert a protein position (p.) to a coordinate. :arg dict position: Protein position model(p.). diff --git a/mutalyzer_crossmapper/locus.py b/mutalyzer_crossmapper/locus.py index fff0c66..d62144d 100644 --- a/mutalyzer_crossmapper/locus.py +++ b/mutalyzer_crossmapper/locus.py @@ -1,6 +1,6 @@ class Locus(object): """Locus object.""" - def __init__(self, location: list[tuple[int, int]], inverted=False) -> None: + def __init__(self, location: tuple[int, int], inverted: bool=False) -> None: """ :arg tuple location: Locus location. :arg bool inverted: Orientation. @@ -30,7 +30,7 @@ def to_position(self, coordinate: int) -> dict[str, int]: return {'position': self._end, 'offset': coordinate - self.boundary[1]} return {'position': coordinate - self.boundary[0], 'offset': 0} - def to_coordinate(self, pos_m: dict) -> int: + def to_coordinate(self, pos_m: dict[str, int]) -> int: """Convert a position model to a coordinate. :arg dict position: Position model with 'position' and 'offset' keys. diff --git a/mutalyzer_crossmapper/multi_locus.py b/mutalyzer_crossmapper/multi_locus.py index 6984c97..5f1cee0 100644 --- a/mutalyzer_crossmapper/multi_locus.py +++ b/mutalyzer_crossmapper/multi_locus.py @@ -49,7 +49,7 @@ def outside(self, coordinate: int) -> int: return coordinate - self._loci[-1].boundary[1] return 0 - def to_position(self, coordinate: int) -> dict[str: int | str]: + def to_position(self, coordinate: int) -> dict[str, int | str]: """Convert a coordinate to a position. :arg int coordinate: Coordinate. @@ -73,7 +73,7 @@ def to_position(self, coordinate: int) -> dict[str: int | str]: 'region': region } - def to_coordinate(self, pos_m: dict) -> int: + def to_coordinate(self, pos_m: dict[str, int | str]) -> int: """Convert a position model to a coordinate. :arg dict pos_m: Position model with 'position','offset' and 'region' keys. From 6d5e2827e0d4da6b127892c2ab2c272f6da7bb6a Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Mon, 30 Mar 2026 11:55:45 +0200 Subject: [PATCH 114/127] Fix doc string --- docs/library.rst | 2 +- mutalyzer_crossmapper/crossmapper.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/library.rst b/docs/library.rst index ec1bdc2..6b358f9 100644 --- a/docs/library.rst +++ b/docs/library.rst @@ -530,7 +530,7 @@ The ``MultiLocus`` class ^^^^^^^^^^^^^^^^^^^^^^^^ The ``MultiLocus`` class is used to deal with offsets with respect to multiple -loci. Its positions is +loci. .. code:: python diff --git a/mutalyzer_crossmapper/crossmapper.py b/mutalyzer_crossmapper/crossmapper.py index ac4ef65..dd0a774 100644 --- a/mutalyzer_crossmapper/crossmapper.py +++ b/mutalyzer_crossmapper/crossmapper.py @@ -3,7 +3,7 @@ class Genomic(object): """Genomic crossmap object.""" - def coordinate_to_genomic(self, coordinate: int) -> dict: + def coordinate_to_genomic(self, coordinate: int) -> dict[str, int]: """Convert a coordinate to a genomic position (g./m./o.). :arg int coordinate: Coordinate. @@ -12,7 +12,7 @@ def coordinate_to_genomic(self, coordinate: int) -> dict: """ return {'position': coordinate + 1} - def genomic_to_coordinate(self, pos_m: dict) -> int: + def genomic_to_coordinate(self, pos_m: dict[str, int]) -> int: """Convert a genomic position (g./m./o.) to a coordinate. :arg dict pos_m: Genomic position model. @@ -216,7 +216,7 @@ def coordinate_to_protein(self, coordinate: int) -> dict[str, int | str]: def protein_to_coordinate(self, pos_m: dict[str, int | str]) -> int: """Convert a protein position (p.) to a coordinate. - :arg dict position: Protein position model(p.). + :arg dict pos_m: Protein position model(p.). :returns int: Coordinate. """ From 8fba978d4755cdcfabf6f6f174b453eb5f104d08 Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Mon, 30 Mar 2026 12:10:20 +0200 Subject: [PATCH 115/127] Update python version in setup --- setup.cfg | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.cfg b/setup.cfg index ca7be54..c58c160 100644 --- a/setup.cfg +++ b/setup.cfg @@ -17,6 +17,7 @@ classifiers = [options] packages = find: +python_requires = >=3.10 [options.extras_require] tests = From 2867ea8d5febc79bd86d57f1c8ca18b158a2a345 Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Mon, 30 Mar 2026 14:28:54 +0200 Subject: [PATCH 116/127] Rename multilocus positon model --- mutalyzer_crossmapper/crossmapper.py | 45 ++++++++++++++-------------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/mutalyzer_crossmapper/crossmapper.py b/mutalyzer_crossmapper/crossmapper.py index dd0a774..8ece035 100644 --- a/mutalyzer_crossmapper/crossmapper.py +++ b/mutalyzer_crossmapper/crossmapper.py @@ -40,18 +40,18 @@ def coordinate_to_noncoding(self, coordinate: int, degenerate: bool=False) -> di :returns dict: Noncoding position model. """ - pos_m = self._noncoding.to_position(coordinate) - region = pos_m['region'] - pos_m['position'] = pos_m['position'] + 1 + multilocus_pos_m = self._noncoding.to_position(coordinate) + noncoding_pos_m = {**multilocus_pos_m, 'position': multilocus_pos_m['position'] + 1} + region = noncoding_pos_m['region'] if region == '': - return pos_m + return noncoding_pos_m if degenerate: if region == 'u': - pos_m['region'] = '-' + noncoding_pos_m['region'] = '-' elif region == 'd': - pos_m['region'] = '*' - return pos_m + noncoding_pos_m['region'] = '*' + return noncoding_pos_m def noncoding_to_coordinate(self, pos_m: dict[str, int | str]) -> int: """Convert a noncoding position (n./r.) to a coordinate. @@ -84,16 +84,16 @@ def __init__( NonCoding.__init__(self, locations, inverted) b0 = self._noncoding.to_position(cds[0]) - b1 = self._noncoding.to_position(cds[1]-1) + b1 = self._noncoding.to_position(cds[1] - 1) e0 = self._noncoding.to_position(locations[0][0]) - e1 = self._noncoding.to_position(locations[-1][1]-1) + e1 = self._noncoding.to_position(locations[-1][1] - 1) if self._inverted: self._coding = (b1['position'] + b1['offset'], b0['position'] + b0['offset'] + 1) - self._exons = (e1['position'], e0['position']) + self._exons = (e1['position'], e0['position'] + 1) else: self._coding = (b0['position'] + b0['offset'], b1['position'] + b1['offset'] + 1) - self._exons = (e0['position'], e1['position']) + self._exons = (e0['position'], e1['position'] + 1) def _coordinate_to_coding(self, coordinate: int) -> dict[str, int | str]: """Convert a coordinate to a coding position (c./r.). @@ -102,14 +102,13 @@ def _coordinate_to_coding(self, coordinate: int) -> dict[str, int | str]: :returns dict: Coding position model (c./r.). """ - noncoding_pos_m = self._noncoding.to_position(coordinate) + multilocus_pos_m = self._noncoding.to_position(coordinate) - if noncoding_pos_m['region'] in ('u', 'd'): - noncoding_pos_m['position'] = noncoding_pos_m['position'] + 1 - return noncoding_pos_m + if multilocus_pos_m['region'] in ('u', 'd'): + return {**multilocus_pos_m, 'position': multilocus_pos_m['position'] + 1} - location = noncoding_pos_m['position'] - offset = noncoding_pos_m['offset'] + location = multilocus_pos_m['position'] + offset = multilocus_pos_m['offset'] if location < self._coding[0]: return { 'position': self._coding[0] - location, @@ -142,11 +141,11 @@ def coordinate_to_coding(self, coordinate: int, degenerate: bool=False) -> dict: if not degenerate or region == '': return pos_m - degenerate_pos_m = {'offset': pos_m['offset']} + degenerate_pos_m = {**pos_m, 'offset': pos_m['offset']} location = pos_m['position'] if region == 'u': if self._inverted: - degenerate_pos_m['position'] = location + self._exons[1] - self._coding[1] + 1 + degenerate_pos_m['position'] = location + self._exons[1] - self._coding[1] else: degenerate_pos_m['position'] = location + self._coding[0] degenerate_pos_m['region'] = '-' @@ -154,7 +153,7 @@ def coordinate_to_coding(self, coordinate: int, degenerate: bool=False) -> dict: if self._inverted: degenerate_pos_m['position'] = location + self._coding[0] else: - degenerate_pos_m['position'] = location + self._exons[1]- self._coding[1] + 1 + degenerate_pos_m['position'] = location + self._exons[1] - self._coding[1] degenerate_pos_m['region'] = '*' return degenerate_pos_m @@ -207,11 +206,13 @@ def coordinate_to_protein(self, coordinate: int) -> dict[str, int | str]: return { 'position': abs(-location // 3), 'position_in_codon': -location % 3 + 1, - **{k: v for k, v in pos.items() if k != 'position'}} + 'region': pos['region'], + 'offset': pos['offset']} return { 'position': (location + 2) // 3, 'position_in_codon': (location + 2) % 3 + 1, - **{k: v for k, v in pos.items() if k != 'position'}} + 'region': pos['region'], + 'offset': pos['offset']} def protein_to_coordinate(self, pos_m: dict[str, int | str]) -> int: """Convert a protein position (p.) to a coordinate. From 12229cd508988f85a3cca102005778cb5e76886f Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Mon, 30 Mar 2026 14:30:12 +0200 Subject: [PATCH 117/127] Remove unneccessary elif --- mutalyzer_crossmapper/multi_locus.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mutalyzer_crossmapper/multi_locus.py b/mutalyzer_crossmapper/multi_locus.py index 5f1cee0..627a31b 100644 --- a/mutalyzer_crossmapper/multi_locus.py +++ b/mutalyzer_crossmapper/multi_locus.py @@ -86,7 +86,7 @@ def to_coordinate(self, pos_m: dict[str, int | str]) -> int: if self._inverted: return self._locations[-1][1] + abs(pos_m['position']) - pos_m['offset'] return self._locations[0][0] - abs(pos_m['position']) + pos_m['offset'] - 1 - elif region == 'd': + if region == 'd': if self._inverted: return self._locations[0][0] - abs(pos_m['position']) - pos_m['offset'] - 1 return self._locations[-1][1] + abs(pos_m['position']) + pos_m['offset'] From c6455e907f25ffe9c0aa74906af80912bb733558 Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Mon, 30 Mar 2026 15:04:05 +0200 Subject: [PATCH 118/127] Fix flake8 whitespace issues --- mutalyzer_crossmapper/crossmapper.py | 12 ++++++------ mutalyzer_crossmapper/location.py | 2 +- mutalyzer_crossmapper/locus.py | 2 +- mutalyzer_crossmapper/multi_locus.py | 2 +- tests/test_crossmapper.py | 9 +++++++++ 5 files changed, 18 insertions(+), 9 deletions(-) diff --git a/mutalyzer_crossmapper/crossmapper.py b/mutalyzer_crossmapper/crossmapper.py index 8ece035..7a38392 100644 --- a/mutalyzer_crossmapper/crossmapper.py +++ b/mutalyzer_crossmapper/crossmapper.py @@ -24,7 +24,7 @@ def genomic_to_coordinate(self, pos_m: dict[str, int]) -> int: class NonCoding(Genomic): """NonCoding crossmap object.""" - def __init__(self, locations: list[tuple[int, int]], inverted: bool=False) -> None: + def __init__(self, locations: list[tuple[int, int]], inverted: bool = False) -> None: """ :arg list locations: List of locus locations. :arg bool inverted: Orientation. @@ -33,7 +33,7 @@ def __init__(self, locations: list[tuple[int, int]], inverted: bool=False) -> No self._noncoding = MultiLocus(locations, inverted) - def coordinate_to_noncoding(self, coordinate: int, degenerate: bool=False) -> dict: + def coordinate_to_noncoding(self, coordinate: int, degenerate: bool = False) -> dict: """Convert a coordinate to a noncoding position (n./r.). :arg int coordinate: Coordinate. @@ -72,9 +72,9 @@ class Coding(NonCoding): """Coding crossmap object.""" def __init__( self, - locations: list[tuple[int,int]], - cds: tuple[int,int], - inverted : bool=False + locations: list[tuple[int, int]], + cds: tuple[int, int], + inverted: bool = False ) -> None: """ :arg list locations: List of locus locations. @@ -127,7 +127,7 @@ def _coordinate_to_coding(self, coordinate: int) -> dict[str, int | str]: 'region': '' } - def coordinate_to_coding(self, coordinate: int, degenerate: bool=False) -> dict: + def coordinate_to_coding(self, coordinate: int, degenerate: bool = False) -> dict: """Convert a coordinate to a coding position (c./r.). :arg int coordinate: Coordinate. diff --git a/mutalyzer_crossmapper/location.py b/mutalyzer_crossmapper/location.py index 1bf7a06..9b534e4 100644 --- a/mutalyzer_crossmapper/location.py +++ b/mutalyzer_crossmapper/location.py @@ -19,7 +19,7 @@ def _nearest_boundary(lb: int, rb: int, c: int, p: int) -> int: return p -def nearest_location(ls: list[tuple[int,int]], c: int, p: int = 0) -> int: +def nearest_location(ls: list[tuple[int, int]], c: int, p: int = 0) -> int: """Find the location nearest to `c`. In case of a draw, the parameter `p` decides which index is chosen. diff --git a/mutalyzer_crossmapper/locus.py b/mutalyzer_crossmapper/locus.py index d62144d..7fbb7b3 100644 --- a/mutalyzer_crossmapper/locus.py +++ b/mutalyzer_crossmapper/locus.py @@ -1,6 +1,6 @@ class Locus(object): """Locus object.""" - def __init__(self, location: tuple[int, int], inverted: bool=False) -> None: + def __init__(self, location: tuple[int, int], inverted: bool = False) -> None: """ :arg tuple location: Locus location. :arg bool inverted: Orientation. diff --git a/mutalyzer_crossmapper/multi_locus.py b/mutalyzer_crossmapper/multi_locus.py index 627a31b..d2c5662 100644 --- a/mutalyzer_crossmapper/multi_locus.py +++ b/mutalyzer_crossmapper/multi_locus.py @@ -19,7 +19,7 @@ def _offsets(locations: list[tuple[int, int]], orientation: int) -> list[int]: class MultiLocus(object): """MultiLocus object.""" - def __init__(self, locations: list[tuple[int, int]], inverted: bool=False) -> None: + def __init__(self, locations: list[tuple[int, int]], inverted: bool = False) -> None: """ :arg list locations: List of locus locations. :arg bool inverted: Orientation. diff --git a/tests/test_crossmapper.py b/tests/test_crossmapper.py index 33aec12..f69ee05 100644 --- a/tests/test_crossmapper.py +++ b/tests/test_crossmapper.py @@ -112,6 +112,15 @@ def test_NonCoding_degenerate(): ], ) + # # Boundary between exon and intron + # degenerate_equal( + # crossmap.noncoding_to_coordinate, + # 29, + # [ + + # ] + # ) + # Boundary between downstream and transcript. degenerate_equal( crossmap.noncoding_to_coordinate, From 12e8e1f67c47d9f87c9f3c8b0ab17c7208a457c9 Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Mon, 30 Mar 2026 15:40:33 +0200 Subject: [PATCH 119/127] Fix whitespaces --- tests/test_crossmapper.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/tests/test_crossmapper.py b/tests/test_crossmapper.py index f69ee05..0ed4c47 100644 --- a/tests/test_crossmapper.py +++ b/tests/test_crossmapper.py @@ -112,15 +112,6 @@ def test_NonCoding_degenerate(): ], ) - # # Boundary between exon and intron - # degenerate_equal( - # crossmap.noncoding_to_coordinate, - # 29, - # [ - - # ] - # ) - # Boundary between downstream and transcript. degenerate_equal( crossmap.noncoding_to_coordinate, @@ -375,7 +366,7 @@ def test_Coding_no_utr5_inverted(): crossmap.coordinate_to_coding, 19, crossmap.coding_to_coordinate, - {'position': 1 , 'offset': 0, 'region': ''}, + {'position': 1, 'offset': 0, 'region': ''}, ) @@ -521,6 +512,7 @@ def test_Coding_degenerate(): """Degenerate upstream and downstream positions are silently corrected.""" crossmap = Coding([(10, 20)], (11, 19)) + # Degenerate position in upstream. degenerate_equal( crossmap.coding_to_coordinate, 9, From e2f2a43184023a36b38f6758bb1837fdb36b970a Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Mon, 30 Mar 2026 16:18:24 +0200 Subject: [PATCH 120/127] Add tests for protein positions from the reverse strand --- tests/test_crossmapper.py | 97 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) diff --git a/tests/test_crossmapper.py b/tests/test_crossmapper.py index 0ed4c47..87b5ea7 100644 --- a/tests/test_crossmapper.py +++ b/tests/test_crossmapper.py @@ -351,6 +351,28 @@ def test_Coding_no_utr5(): ) +def test_Coding_no_intron(): + crossmap = Coding([(10, 20), (20, 30)], (15, 25)) + + invariant( + crossmap.coordinate_to_coding, + 20, + crossmap.coding_to_coordinate, + {'position': 6, 'offset': 0, 'region': ''}, + ) + + +def test_Coding_no_intron_inverted(): + crossmap = Coding([(10, 20), (20, 30)], (15, 25), True) + + invariant( + crossmap.coordinate_to_coding, + 20, + crossmap.coding_to_coordinate, + {'position': 5, 'offset': 0, 'region': ''}, + ) + + def test_Coding_no_utr5_inverted(): """A 5' UTR may be missing.""" crossmap = Coding([(10, 20)], (15, 20), True) @@ -788,3 +810,78 @@ def test_Coding_protein(): crossmap.protein_to_coordinate, {'position': 1, 'position_in_codon': 1, 'offset': 0, 'region': 'd'} ) + + +def test_Coding_inverted_protein(): + """Protein positions.""" + crossmap = Coding(_exons, _cds, True) + + # Boundary between upstream and 5' UTR + invariant( + crossmap.coordinate_to_protein, + 4, + crossmap.protein_to_coordinate, + {'position': 1, 'position_in_codon': 1, 'offset': 0, 'region': 'd'} + ) + invariant( + crossmap.coordinate_to_protein, + 5, + crossmap.protein_to_coordinate, + {'position': 4, 'position_in_codon': 2, 'offset': 0, 'region': '*'} + ) + + # Boundary between 5' UTR and CDS + invariant( + crossmap.coordinate_to_protein, + 31, + crossmap.protein_to_coordinate, + {'position': 1, 'position_in_codon': 1, 'offset': 0, 'region': '*'}, + ) + invariant( + crossmap.coordinate_to_protein, + 32, + crossmap.protein_to_coordinate, + {'position': 2, 'position_in_codon': 3, 'offset': 0, 'region': ''}, + ) + + # Intron boundary. + invariant( + crossmap.coordinate_to_protein, + 34, + crossmap.protein_to_coordinate, + {'position': 2, 'position_in_codon': 1, 'offset': 0, 'region': ''}, + ) + invariant( + crossmap.coordinate_to_protein, + 35, + crossmap.protein_to_coordinate, + {'position': 2, 'position_in_codon': 1, 'offset': -1, 'region': ''}, + ) + + # Boundary between CDS and 3' UTR. + invariant( + crossmap.coordinate_to_protein, + 42, + crossmap.protein_to_coordinate, + {'position': 1, 'position_in_codon': 1, 'offset': 0, 'region': ''}, + ) + invariant( + crossmap.coordinate_to_protein, + 43, + crossmap.protein_to_coordinate, + {'position': 1, 'position_in_codon': 3, 'offset': 0, 'region': '-'}, + ) + + # Boundary between 3' UTR and downstream + invariant( + crossmap.coordinate_to_protein, + 71, + crossmap.protein_to_coordinate, + {'position': 2, 'position_in_codon': 2, 'offset': 0, 'region': '-'} + ) + invariant( + crossmap.coordinate_to_protein, + 72, + crossmap.protein_to_coordinate, + {'position': 1, 'position_in_codon': 3, 'offset': 0, 'region': 'u'} + ) From 50d50b57798c90396f5792c5ccf5692914d3551d Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Tue, 31 Mar 2026 13:37:16 +0200 Subject: [PATCH 121/127] Rename external (input/output) variable to pos_m and add test for a one base intron in coding --- mutalyzer_crossmapper/crossmapper.py | 66 ++++++++++++++++------------ tests/test_crossmapper.py | 22 ++++++++++ 2 files changed, 59 insertions(+), 29 deletions(-) diff --git a/mutalyzer_crossmapper/crossmapper.py b/mutalyzer_crossmapper/crossmapper.py index 7a38392..d10ec14 100644 --- a/mutalyzer_crossmapper/crossmapper.py +++ b/mutalyzer_crossmapper/crossmapper.py @@ -33,7 +33,11 @@ def __init__(self, locations: list[tuple[int, int]], inverted: bool = False) -> self._noncoding = MultiLocus(locations, inverted) - def coordinate_to_noncoding(self, coordinate: int, degenerate: bool = False) -> dict: + def coordinate_to_noncoding( + self, + coordinate: int, + degenerate: bool = False + ) -> dict[str, int | str]: """Convert a coordinate to a noncoding position (n./r.). :arg int coordinate: Coordinate. @@ -41,17 +45,14 @@ def coordinate_to_noncoding(self, coordinate: int, degenerate: bool = False) -> :returns dict: Noncoding position model. """ multilocus_pos_m = self._noncoding.to_position(coordinate) - noncoding_pos_m = {**multilocus_pos_m, 'position': multilocus_pos_m['position'] + 1} - region = noncoding_pos_m['region'] - if region == '': - return noncoding_pos_m - + pos_m = {**multilocus_pos_m, 'position': multilocus_pos_m['position'] + 1} + region = pos_m['region'] if degenerate: if region == 'u': - noncoding_pos_m['region'] = '-' + pos_m['region'] = '-' elif region == 'd': - noncoding_pos_m['region'] = '*' - return noncoding_pos_m + pos_m['region'] = '*' + return pos_m def noncoding_to_coordinate(self, pos_m: dict[str, int | str]) -> int: """Convert a noncoding position (n./r.) to a coordinate. @@ -61,9 +62,9 @@ def noncoding_to_coordinate(self, pos_m: dict[str, int | str]) -> int: :returns int: Coordinate. """ multilocus_pos_m = {**pos_m, 'position': pos_m['position'] - 1} - if multilocus_pos_m['region'] == '-': + if pos_m['region'] == '-': multilocus_pos_m['region'] = 'u' - elif multilocus_pos_m['region'] == '*': + elif pos_m['region'] == '*': multilocus_pos_m['region'] = 'd' return self._noncoding.to_coordinate(multilocus_pos_m) @@ -83,17 +84,25 @@ def __init__( """ NonCoding.__init__(self, locations, inverted) - b0 = self._noncoding.to_position(cds[0]) - b1 = self._noncoding.to_position(cds[1] - 1) - e0 = self._noncoding.to_position(locations[0][0]) - e1 = self._noncoding.to_position(locations[-1][1] - 1) + cds_start = self._noncoding.to_position(cds[0]) + cds_end = self._noncoding.to_position(cds[1] - 1) + exons_start = self._noncoding.to_position(locations[0][0]) + exons_end = self._noncoding.to_position(locations[-1][1] - 1) if self._inverted: - self._coding = (b1['position'] + b1['offset'], b0['position'] + b0['offset'] + 1) - self._exons = (e1['position'], e0['position'] + 1) + self._coding = ( + cds_end['position'] + cds_end['offset'], + cds_start['position'] + cds_start['offset'] + 1 + ) + # Used in degenerate option + self._exons_len = exons_start['position'] + 1 else: - self._coding = (b0['position'] + b0['offset'], b1['position'] + b1['offset'] + 1) - self._exons = (e0['position'], e1['position'] + 1) + self._coding = ( + cds_start['position'] + cds_start['offset'], + cds_end['position'] + cds_end['offset'] + 1 + ) + # Used in degenerate option + self._exons_len = exons_end['position'] + 1 def _coordinate_to_coding(self, coordinate: int) -> dict[str, int | str]: """Convert a coordinate to a coding position (c./r.). @@ -145,7 +154,7 @@ def coordinate_to_coding(self, coordinate: int, degenerate: bool = False) -> dic location = pos_m['position'] if region == 'u': if self._inverted: - degenerate_pos_m['position'] = location + self._exons[1] - self._coding[1] + degenerate_pos_m['position'] = location + self._exons_len - self._coding[1] else: degenerate_pos_m['position'] = location + self._coding[0] degenerate_pos_m['region'] = '-' @@ -153,7 +162,7 @@ def coordinate_to_coding(self, coordinate: int, degenerate: bool = False) -> dic if self._inverted: degenerate_pos_m['position'] = location + self._coding[0] else: - degenerate_pos_m['position'] = location + self._exons[1] - self._coding[1] + degenerate_pos_m['position'] = location + self._exons_len - self._coding[1] degenerate_pos_m['region'] = '*' return degenerate_pos_m @@ -179,7 +188,6 @@ def _coding_to_coordinate(self, pos_m: dict[str, int | str]) -> int: multilocus_pos_m['position'] = self._coding[0] - location else: multilocus_pos_m['position'] = self._coding[1] + location - 1 - return self._noncoding.to_coordinate(multilocus_pos_m) def coding_to_coordinate(self, pos_m: dict[str, int | str]) -> int: @@ -199,20 +207,20 @@ def coordinate_to_protein(self, coordinate: int) -> dict[str, int | str]: :returns dict: Protein position model(p.). """ - pos = self.coordinate_to_coding(coordinate) + pos_m = self.coordinate_to_coding(coordinate) - location = pos['position'] - if pos['region'] in ('-', 'u'): + location = pos_m['position'] + if pos_m['region'] in ('-', 'u'): return { 'position': abs(-location // 3), 'position_in_codon': -location % 3 + 1, - 'region': pos['region'], - 'offset': pos['offset']} + 'region': pos_m['region'], + 'offset': pos_m['offset']} return { 'position': (location + 2) // 3, 'position_in_codon': (location + 2) % 3 + 1, - 'region': pos['region'], - 'offset': pos['offset']} + 'region': pos_m['region'], + 'offset': pos_m['offset']} def protein_to_coordinate(self, pos_m: dict[str, int | str]) -> int: """Convert a protein position (p.) to a coordinate. diff --git a/tests/test_crossmapper.py b/tests/test_crossmapper.py index 87b5ea7..098508b 100644 --- a/tests/test_crossmapper.py +++ b/tests/test_crossmapper.py @@ -373,6 +373,28 @@ def test_Coding_no_intron_inverted(): ) +def test_Coding_one_base_intron(): + crossmap = Coding([(10, 19), (20, 30)], (15, 25)) + + invariant( + crossmap.coordinate_to_coding, + 19, + crossmap.coding_to_coordinate, + {'position': 4, 'offset': 1, 'region': ''}, + ) + + +def test_Coding_one_base_intron_inverted(): + crossmap = Coding([(10, 19), (20, 30)], (15, 25), True) + + invariant( + crossmap.coordinate_to_coding, + 19, + crossmap.coding_to_coordinate, + {'position': 5, 'offset': 1, 'region': ''}, + ) + + def test_Coding_no_utr5_inverted(): """A 5' UTR may be missing.""" crossmap = Coding([(10, 20)], (15, 20), True) From 73e2965be5899da0e1f02d0e18df49e42e9645cc Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Tue, 31 Mar 2026 14:29:57 +0200 Subject: [PATCH 122/127] Fix typo and font --- docs/library.rst | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/docs/library.rst b/docs/library.rst index 6b358f9..7720088 100644 --- a/docs/library.rst +++ b/docs/library.rst @@ -22,7 +22,7 @@ They are represented as 1-key dictionaries. Below is an example of ``g.1`` in HG Where: -- **position**: an integer representing a nucleotide position (>0) +- **position**: an integer representing a nucleotide position (> 0) Genomic Position Conversion ~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -68,7 +68,7 @@ as 3-key dictionaries. Below is an example of ``n.14+1`` in HGVS. Where: -- **position**: an integer representing a transcript position (>0) +- **position**: an integer representing a nucleotide position (> 0) - **offset**: an integer indicating the offset relative to the position (negative for upstream, positive for downstream) - **region**: a string describing the region type (empty for positions within a non-coding @@ -86,7 +86,7 @@ NonCoding Position Conversion Now the functions ``coordinate_to_noncoding()`` and ``noncoding_to_coordinate()`` can be used. -In our example, the HGVS position ``g.36`` (coordinate `35`) is equivalent to +In our example, the HGVS position ``g.36`` (coordinate *35*) is equivalent to position ``n.14+1``. We can convert between these two as follows. .. code:: python @@ -96,8 +96,8 @@ position ``n.14+1``. We can convert between these two as follows. >>> crossmap.noncoding_to_coordinate({'position': 14, 'offset': 1, 'region': ''}) 35 -When the coordinate is upstream or downstream of the transcript, we use ``u`` to -present upstream and ``d`` to present downstream. +When the coordinate is upstream or downstream of the transcript, we use ``u`` to +denote upstream and ``d`` to denote downstream. .. code:: python @@ -124,7 +124,7 @@ instead. For transcripts that reside on the reverse complement strand, the ``inverted`` parameter should be set to ``True``. In our example, HGVS position ``g.36`` -(coordinate `35`) is now equivalent to position ``n.9-1``. +(coordinate *35*) is now equivalent to position ``n.9-1``. .. code:: python @@ -210,7 +210,7 @@ represented as 3-key dictionaries. Here is an example of ``c.*1+3``. Where: -- **position**: an integer representing a transcript position (>0) +- **position**: an integer representing a transcript position (> 0) - **offset**: an integer indicating the offset relative to the position (negative for upstream, positive for downstream) - **region**: a string describing the region type (empty for positions within coding DNA sequence, @@ -230,7 +230,7 @@ On top of the functionality provided by the ``NonCoding`` class, the functions ``coordinate_to_coding()`` and ``coding_to_coordinate()`` can be used. These functions use a 3-key dictionary to represent a coding position. -In our example, the HGVS position ``g.32`` (coordinate `31`) is equivalent to +In our example, the HGVS position ``g.32`` (coordinate *31*) is equivalent to position ``c.-1``. We can convert between these two as follows. .. code:: python @@ -335,7 +335,7 @@ Protein Additionally, the functions ``coordinate_to_protein()`` and ``protein_to_coordinate()`` can be used. These functions use a 4-key dictionary -to represent a protein position. Here is one example of three posibilities +to represent a protein position. Here is one example of three possibilities for ``p.1`` in HGVS. .. code-block:: python @@ -349,13 +349,13 @@ for ``p.1`` in HGVS. Where: -- **position**: an integer representing an amino acid position (>0) -- **position_in_codon**: an integer indicating the nucleotide index within the codon (1, 2, or 3) +- **position**: an integer representing an amino acid position (> 0) +- **position_in_codon**: an integer indexing the position in a codon (1, 2, or 3) - **offset**: an integer indicating offset relative to the nucleotide specified by `position_in_codon` in the codon -- **region**: a string describing the region type (empty for vaid amino acid positions) +- **region**: a string describing the region type (empty for valid amino acid positions) -In our example the HGVS position ``g.42`` (coordinate `41`) corresponds with -position ``p.2``. We can convert between these to as follows. +In our example, the HGVS position ``g.42`` (coordinate *41*) corresponds with +position ``p.2``. We can convert between these two as follows. .. code:: python From 3ec8ab476a021a2a8369067c4b8791b5ba628f00 Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Wed, 1 Apr 2026 11:37:08 +0200 Subject: [PATCH 123/127] Update degenerate for noncoding and tests --- mutalyzer_crossmapper/crossmapper.py | 16 ++++++++++++++-- tests/test_crossmapper.py | 21 ++++++++++----------- 2 files changed, 24 insertions(+), 13 deletions(-) diff --git a/mutalyzer_crossmapper/crossmapper.py b/mutalyzer_crossmapper/crossmapper.py index d10ec14..2e948bd 100644 --- a/mutalyzer_crossmapper/crossmapper.py +++ b/mutalyzer_crossmapper/crossmapper.py @@ -32,6 +32,10 @@ def __init__(self, locations: list[tuple[int, int]], inverted: bool = False) -> self._inverted = inverted self._noncoding = MultiLocus(locations, inverted) + if self._inverted: + self._exons_len = self._noncoding.to_position(locations[0][0])['position'] + 1 + else: + self._exons_len = self._noncoding.to_position(locations[-1][1] - 1)['position'] + 1 def coordinate_to_noncoding( self, @@ -50,8 +54,12 @@ def coordinate_to_noncoding( if degenerate: if region == 'u': pos_m['region'] = '-' + pos_m['offset'] = -pos_m['position'] + pos_m['position'] = 1 elif region == 'd': pos_m['region'] = '*' + pos_m['offset'] = pos_m['position'] + pos_m['position'] = self._exons_len return pos_m def noncoding_to_coordinate(self, pos_m: dict[str, int | str]) -> int: @@ -62,10 +70,14 @@ def noncoding_to_coordinate(self, pos_m: dict[str, int | str]) -> int: :returns int: Coordinate. """ multilocus_pos_m = {**pos_m, 'position': pos_m['position'] - 1} - if pos_m['region'] == '-': + if pos_m['region'] == '-': # degenerate results multilocus_pos_m['region'] = 'u' - elif pos_m['region'] == '*': + multilocus_pos_m['position'] = abs(pos_m['offset']) -1 + multilocus_pos_m['offset'] = 0 + if pos_m['region'] == '*': # degenerate results multilocus_pos_m['region'] = 'd' + multilocus_pos_m['position'] = abs(pos_m['offset']) -1 + multilocus_pos_m['offset'] = 0 return self._noncoding.to_coordinate(multilocus_pos_m) diff --git a/tests/test_crossmapper.py b/tests/test_crossmapper.py index 098508b..bc33282 100644 --- a/tests/test_crossmapper.py +++ b/tests/test_crossmapper.py @@ -107,8 +107,7 @@ def test_NonCoding_degenerate(): [ {'position': 1, 'offset': -1, 'region': ''}, {'position': 1, 'offset': 0, 'region': 'u'}, - {'position': 1, 'offset': 0, 'region': '-'}, - {'position': 2, 'offset': 1, 'region': '-'}, + {'position': 1, 'offset': -1, 'region': '-'}, ], ) @@ -121,7 +120,7 @@ def test_NonCoding_degenerate(): {'position': 22, 'offset': 1, 'region': ''}, {'position': 23, 'offset': 0, 'region': ''}, {'position': 24, 'offset': -1, 'region': ''}, - {'position': 1, 'offset': 0, 'region': '*'}, + {'position': 22, 'offset': 1, 'region': '*'}, # standard degenerate result ], ) @@ -137,7 +136,7 @@ def test_NonCoding_inverted_degenerate(): [ {'position': 1, 'offset': -1, 'region': ''}, {'position': 1, 'offset': 0, 'region': 'u'}, - {'position': 1, 'offset': 0, 'region': '-'}, + {'position': 1, 'offset': -1, 'region': '-'}, ], ) @@ -147,7 +146,7 @@ def test_NonCoding_inverted_degenerate(): 4, [ {'position': 1, 'offset': 0, 'region': 'd'}, - {'position': 1, 'offset': 0, 'region': '*'}, + {'position': 22, 'offset': 1, 'region': '*'}, {'position': 23, 'offset': 0, 'region': ''}, {'position': 22, 'offset': 1, 'region': ''}, ], @@ -159,13 +158,13 @@ def test_NonCoding_degenerate_return(): assert crossmap.coordinate_to_noncoding(4, True) == { 'position': 1, - 'offset': 0, + 'offset': -1, 'region': '-', } assert crossmap.coordinate_to_noncoding(72, True) == { - 'position': 1, - 'offset': 0, + 'position': 22, + 'offset': 1, 'region': '*', } @@ -175,13 +174,13 @@ def test_NonCoding_inverted_degenerate_return(): assert crossmap.coordinate_to_noncoding(72, True) == { 'position': 1, - 'offset': 0, + 'offset': -1, 'region': '-', } assert crossmap.coordinate_to_noncoding(4, True) == { - 'position': 1, - 'offset': 0, + 'position': 22, + 'offset': 1, 'region': '*', } From ba091d536a4f323db490174d4e6421aee75795b1 Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Wed, 1 Apr 2026 12:04:53 +0200 Subject: [PATCH 124/127] Update degenerate for noncoding example in documentation --- docs/library.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/library.rst b/docs/library.rst index 7720088..64aa788 100644 --- a/docs/library.rst +++ b/docs/library.rst @@ -113,14 +113,14 @@ denote upstream and ``d`` to denote downstream. The ``coordinate_to_noncoding()`` function accepts an optional ``degenerate`` argument. When set to ``True``, positions outside of the transcript are no longer described using the ``u`` or ``d`` notation, ``-`` and ``*`` are used -instead. +instead. The values in ``position`` and ``offset`` will change accordingly. .. code:: python >>> crossmap.coordinate_to_noncoding(2) {'position': 3, 'offset': 0, 'region': 'u'} >>> crossmap.coordinate_to_noncoding(2, True) - {'position': 3, 'offset': 0, 'region': '-'} + {'position': 1, 'offset': -3, 'region': '-'} For transcripts that reside on the reverse complement strand, the ``inverted`` parameter should be set to ``True``. In our example, HGVS position ``g.36`` From 39d1d6b5fe5a09e112f61b310da6cb6a751327b4 Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Wed, 1 Apr 2026 15:02:02 +0200 Subject: [PATCH 125/127] Discard unneccessary functions for coding --- mutalyzer_crossmapper/crossmapper.py | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/mutalyzer_crossmapper/crossmapper.py b/mutalyzer_crossmapper/crossmapper.py index 2e948bd..117a62e 100644 --- a/mutalyzer_crossmapper/crossmapper.py +++ b/mutalyzer_crossmapper/crossmapper.py @@ -178,7 +178,7 @@ def coordinate_to_coding(self, coordinate: int, degenerate: bool = False) -> dic degenerate_pos_m['region'] = '*' return degenerate_pos_m - def _coding_to_coordinate(self, pos_m: dict[str, int | str]) -> int: + def coding_to_coordinate(self, pos_m: dict[str, int | str]) -> int: """Convert a coding position (c./r.) to a coordinate. :arg dict pos_m: Coding position model (c./r.). @@ -202,16 +202,6 @@ def _coding_to_coordinate(self, pos_m: dict[str, int | str]) -> int: multilocus_pos_m['position'] = self._coding[1] + location - 1 return self._noncoding.to_coordinate(multilocus_pos_m) - def coding_to_coordinate(self, pos_m: dict[str, int | str]) -> int: - """Convert a coding position (c./r.) to a coordinate. - - :arg dict pos_m: Coding position model (c./r.). - - :returns int: Coordinate. - """ - - return self._coding_to_coordinate(pos_m) - def coordinate_to_protein(self, coordinate: int) -> dict[str, int | str]: """Convert a coordinate to a protein position (p.). From 915e8453a90bdf470b60d2c11dee11462598e84c Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Wed, 1 Apr 2026 15:15:43 +0200 Subject: [PATCH 126/127] Cleanup --- mutalyzer_crossmapper/crossmapper.py | 8 ++++---- tests/test_crossmapper.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/mutalyzer_crossmapper/crossmapper.py b/mutalyzer_crossmapper/crossmapper.py index 117a62e..0687b2b 100644 --- a/mutalyzer_crossmapper/crossmapper.py +++ b/mutalyzer_crossmapper/crossmapper.py @@ -70,13 +70,13 @@ def noncoding_to_coordinate(self, pos_m: dict[str, int | str]) -> int: :returns int: Coordinate. """ multilocus_pos_m = {**pos_m, 'position': pos_m['position'] - 1} - if pos_m['region'] == '-': # degenerate results + if pos_m['region'] == '-': # degenerate results multilocus_pos_m['region'] = 'u' - multilocus_pos_m['position'] = abs(pos_m['offset']) -1 + multilocus_pos_m['position'] = abs(pos_m['offset']) - 1 multilocus_pos_m['offset'] = 0 - if pos_m['region'] == '*': # degenerate results + if pos_m['region'] == '*': # degenerate results multilocus_pos_m['region'] = 'd' - multilocus_pos_m['position'] = abs(pos_m['offset']) -1 + multilocus_pos_m['position'] = abs(pos_m['offset']) - 1 multilocus_pos_m['offset'] = 0 return self._noncoding.to_coordinate(multilocus_pos_m) diff --git a/tests/test_crossmapper.py b/tests/test_crossmapper.py index bc33282..a186937 100644 --- a/tests/test_crossmapper.py +++ b/tests/test_crossmapper.py @@ -120,7 +120,7 @@ def test_NonCoding_degenerate(): {'position': 22, 'offset': 1, 'region': ''}, {'position': 23, 'offset': 0, 'region': ''}, {'position': 24, 'offset': -1, 'region': ''}, - {'position': 22, 'offset': 1, 'region': '*'}, # standard degenerate result + {'position': 22, 'offset': 1, 'region': '*'}, # standard degenerate result ], ) From 69eb98589a7851f6f2dbec3a480eca92ad25e895 Mon Sep 17 00:00:00 2001 From: Xiaoyun Liu Date: Wed, 1 Apr 2026 16:04:40 +0200 Subject: [PATCH 127/127] Fix error for inverted coding --- mutalyzer_crossmapper/crossmapper.py | 10 ++-------- tests/test_crossmapper.py | 30 ++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 8 deletions(-) diff --git a/mutalyzer_crossmapper/crossmapper.py b/mutalyzer_crossmapper/crossmapper.py index 0687b2b..88cba2e 100644 --- a/mutalyzer_crossmapper/crossmapper.py +++ b/mutalyzer_crossmapper/crossmapper.py @@ -165,16 +165,10 @@ def coordinate_to_coding(self, coordinate: int, degenerate: bool = False) -> dic degenerate_pos_m = {**pos_m, 'offset': pos_m['offset']} location = pos_m['position'] if region == 'u': - if self._inverted: - degenerate_pos_m['position'] = location + self._exons_len - self._coding[1] - else: - degenerate_pos_m['position'] = location + self._coding[0] + degenerate_pos_m['position'] = location + self._coding[0] degenerate_pos_m['region'] = '-' if region == 'd': - if self._inverted: - degenerate_pos_m['position'] = location + self._coding[0] - else: - degenerate_pos_m['position'] = location + self._exons_len - self._coding[1] + degenerate_pos_m['position'] = location + self._exons_len - self._coding[1] degenerate_pos_m['region'] = '*' return degenerate_pos_m diff --git a/tests/test_crossmapper.py b/tests/test_crossmapper.py index a186937..dcdb718 100644 --- a/tests/test_crossmapper.py +++ b/tests/test_crossmapper.py @@ -639,6 +639,11 @@ def test_Coding_inverted_degenerate_return(): 'offset': 0, 'region': '-', } + assert crossmap.coordinate_to_coding(25, True) == { + 'position': 7, + 'offset': 0, + 'region': '-', + } assert crossmap.coordinate_to_coding(9, True) == { 'position': 2, 'offset': 0, @@ -646,6 +651,31 @@ def test_Coding_inverted_degenerate_return(): } +def test_Coding_two_exons_inverted_degenerate_return(): + """Degenerate upstream and downstream positions may be returned.""" + crossmap = Coding([(10, 20), (30, 40)], (18, 37), True) + + assert crossmap.coordinate_to_coding(5, True) == { + 'position': 13, + 'offset': 0, + 'region': '*', + } + assert crossmap.coordinate_to_coding(25, True) == { + 'position': 7, + 'offset': 5, + 'region': '', + } + assert crossmap.coordinate_to_coding(35, True) == { + 'position': 2, + 'offset': 0, + 'region': '', + } + assert crossmap.coordinate_to_coding(38, True) == { + 'position': 2, + 'offset': 0, + 'region': '-', + } + def test_Coding_degenerate_no_return(): """Degenerate internal positions do not exist.""" crossmap = Coding([(10, 20), (30, 40)], (10, 40))