Skip to content

Commit c4eb253

Browse files
KingKong1213dhaupt88simbig
authored
feat: use NucleotidePosition value object, add 0-based coordinate conversion (#85)
* Feat: Use nucleotide position instead of integer. Therefore, the interger is already validated * Apply php-cs-fixer changes * fix: enforce 1-based coordinates, add 0-based half-open conversion NucleotidePosition must reject position 0 to stay consistent with the 1-based closed coordinate system used by length(), containsPosition(), intersection() and HGVS g. notation. Add GenomicRegion::fromZeroBasedHalfOpen() and toZeroBasedHalfOpen() for BED/BAM/bigWig interoperability: convert at the I/O boundary, work internally in 1-based closed coordinates. 🤖 Generated with Claude Code * Implement 1-based and 0-based logik into NucleotidePosition.php * Add function genomicPositions to GenomicRegion.php Change function name parse to parseOneBased * Apply php-cs-fixer changes --------- Co-authored-by: Dennis Haupt <dennis.haupt@mll.com> Co-authored-by: KingKong1213 <168984406+KingKong1213@users.noreply.github.com> Co-authored-by: Simon Bigelmayr <simon.bigelmayr@mll.com>
1 parent d2c6803 commit c4eb253

6 files changed

Lines changed: 229 additions & 93 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,4 @@
44
/vendor
55
/composer.lock
66
/.phpunit.result.cache
7+
/.claude/settings.local.json

src/GenomicPosition.php

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,24 +10,20 @@ class GenomicPosition
1010

1111
public int $position;
1212

13-
public function __construct(Chromosome $chromosome, int $position)
13+
public function __construct(Chromosome $chromosome, NucleotidePosition $position)
1414
{
15-
if ($position < 1) {
16-
throw new \InvalidArgumentException("Position must be positive, got: {$position}.");
17-
}
18-
1915
$this->chromosome = $chromosome;
20-
$this->position = $position;
16+
$this->position = $position->value;
2117
}
2218

23-
/** @example GenomicPosition::parse('chr1:123456') */
24-
public static function parse(string $value): self
19+
/** @example GenomicPosition::parseOneBased('chr1:123456') */
20+
public static function parseOneBased(string $value): self
2521
{
2622
if (preg_match('/^([^:]+):(g\.|)(\d+)$/', $value, $matches) === 0) {
2723
throw new \InvalidArgumentException("Invalid genomic position format: {$value}. Expected format: chr1:123456.");
2824
}
2925

30-
return new self(new Chromosome($matches[1]), (int) $matches[3]);
26+
return new self(new Chromosome($matches[1]), NucleotidePosition::fromOneBased((int) $matches[3]));
3127
}
3228

3329
public function equals(self $other): bool

src/GenomicRegion.php

Lines changed: 38 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -14,36 +14,28 @@ class GenomicRegion
1414

1515
public function __construct(
1616
Chromosome $chromosome,
17-
int $start,
18-
int $end
17+
NucleotidePosition $start,
18+
NucleotidePosition $end
1919
) {
20-
if ($start < 1) {
21-
throw new \InvalidArgumentException("Start must be positive, got: {$start}.");
22-
}
23-
24-
if ($end < 1) {
25-
throw new \InvalidArgumentException("End must be positive, got: {$end}.");
26-
}
27-
28-
if ($start > $end) {
29-
throw new \InvalidArgumentException("End ({$end}) must not be less than start ({$start}).");
20+
if ($start->value > $end->value) {
21+
throw new \InvalidArgumentException("End ({$end->value}) must not be less than start ({$start->value}).");
3022
}
3123

3224
$this->chromosome = $chromosome;
33-
$this->start = $start;
34-
$this->end = $end;
25+
$this->start = $start->value;
26+
$this->end = $end->value;
3527
}
3628

37-
public static function parse(string $value): self
29+
public static function parseOneBased(string $value): self
3830
{
3931
if (preg_match('/^([^:]+):(g\.|)(\d+)(-(\d+)|)$/', $value, $matches) === 0) {
4032
throw new \InvalidArgumentException("Invalid genomic region format: {$value}. Expected format: chr1:123-456.");
4133
}
4234

4335
return new self(
4436
new Chromosome($matches[1]),
45-
(int) $matches[3],
46-
(int) ($matches[5] ?? $matches[3])
37+
NucleotidePosition::fromOneBased((int) $matches[3]),
38+
NucleotidePosition::fromOneBased((int) ($matches[5] ?? $matches[3]))
4739
);
4840
}
4941

@@ -98,13 +90,40 @@ public function intersection(self $other): ?self
9890

9991
return new self(
10092
$this->chromosome,
101-
max($this->start, $other->start),
102-
min($this->end, $other->end)
93+
NucleotidePosition::fromOneBased(max($this->start, $other->start)),
94+
NucleotidePosition::fromOneBased(min($this->end, $other->end))
95+
);
96+
}
97+
98+
/** Constructs a 1-based closed region from 0-based half-open coordinates (BED, BAM, bigWig). */
99+
public static function fromZeroBasedHalfOpen(string $chromosome, int $start, int $end): self
100+
{
101+
return new self(
102+
new Chromosome($chromosome),
103+
NucleotidePosition::fromZeroBased($start),
104+
NucleotidePosition::fromOneBased($end)
103105
);
104106
}
105107

108+
/** @return array{Chromosome, int, int} Chromosome, 0-based start, half-open end. */
109+
public function toZeroBasedHalfOpen(): array
110+
{
111+
return [$this->chromosome, $this->start - 1, $this->end];
112+
}
113+
106114
private function containsCoordinate(int $position): bool
107115
{
108116
return $position >= $this->start && $position <= $this->end;
109117
}
118+
119+
/** @return array<int, GenomicPosition> */
120+
public function genomicPositions(): array
121+
{
122+
$items = [];
123+
for ($genomicPosition = $this->start; $genomicPosition <= $this->end; ++$genomicPosition) {
124+
$items[] = new GenomicPosition($this->chromosome, NucleotidePosition::fromOneBased($genomicPosition));
125+
}
126+
127+
return $items;
128+
}
110129
}

src/NucleotidePosition.php

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
<?php declare(strict_types=1);
2+
3+
namespace MLL\Utils;
4+
5+
class NucleotidePosition
6+
{
7+
public int $value;
8+
9+
private function __construct(int $oneBasedPosition)
10+
{
11+
$this->value = $oneBasedPosition;
12+
}
13+
14+
public static function fromOneBased(int $position): self
15+
{
16+
if ($position < 1) {
17+
throw new \InvalidArgumentException("Position must be positive, got: {$position}.");
18+
}
19+
20+
$instance = new self($position);
21+
22+
return $instance;
23+
}
24+
25+
public static function fromZeroBased(int $position): self
26+
{
27+
if ($position < 0) {
28+
throw new \InvalidArgumentException("Position must not be negative, got: {$position}.");
29+
}
30+
31+
$instance = new self($position + 1);
32+
33+
return $instance;
34+
}
35+
}

tests/GenomicPositionTest.php

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,63 +1,71 @@
11
<?php declare(strict_types=1);
22

3-
use MLL\Utils\Chromosome;
43
use MLL\Utils\GenomicPosition;
54
use MLL\Utils\NamingConvention;
5+
use MLL\Utils\NucleotidePosition;
66
use PHPUnit\Framework\Attributes\DataProvider;
77
use PHPUnit\Framework\TestCase;
88

99
final class GenomicPositionTest extends TestCase
1010
{
1111
public function testParseUCSC(): void
1212
{
13-
$position = GenomicPosition::parse('chr11:1');
13+
$position = GenomicPosition::parseOneBased('chr11:1');
1414
self::assertSame('chr11:1', $position->toString(new NamingConvention(NamingConvention::UCSC)));
1515
}
1616

1717
public function testParseEnsembl(): void
1818
{
19-
$position = GenomicPosition::parse('11:1');
19+
$position = GenomicPosition::parseOneBased('11:1');
2020
self::assertSame('11:1', $position->toString(new NamingConvention(NamingConvention::ENSEMBL)));
2121
}
2222

2323
public function testParseHGVSg(): void
2424
{
25-
$position = GenomicPosition::parse('chr11:g.1');
25+
$position = GenomicPosition::parseOneBased('chr11:g.1');
2626
self::assertSame('chr11:1', $position->toString(new NamingConvention(NamingConvention::UCSC)));
2727
}
2828

2929
public function testOutputInBothConventions(): void
3030
{
31-
$position = GenomicPosition::parse('chr11:12345');
31+
$position = GenomicPosition::parseOneBased('chr11:12345');
3232
self::assertSame('chr11:12345', $position->toString(new NamingConvention(NamingConvention::UCSC)));
3333
self::assertSame('11:12345', $position->toString(new NamingConvention(NamingConvention::ENSEMBL)));
3434
}
3535

3636
public function testEquals(): void
3737
{
3838
self::assertTrue(
39-
GenomicPosition::parse('chr11:100')->equals(GenomicPosition::parse('11:100'))
39+
GenomicPosition::parseOneBased('chr11:100')->equals(GenomicPosition::parseOneBased('11:100'))
4040
);
4141
self::assertFalse(
42-
GenomicPosition::parse('chr11:100')->equals(GenomicPosition::parse('chr11:101'))
42+
GenomicPosition::parseOneBased('chr11:100')->equals(GenomicPosition::parseOneBased('chr11:101'))
4343
);
4444
self::assertFalse(
45-
GenomicPosition::parse('chr11:100')->equals(GenomicPosition::parse('chr12:100'))
45+
GenomicPosition::parseOneBased('chr11:100')->equals(GenomicPosition::parseOneBased('chr12:100'))
4646
);
4747
}
4848

49-
public function testConstructorRejectsNonPositivePosition(): void
49+
public function testConstructorRejectsZeroPosition(): void
5050
{
5151
self::expectException(\InvalidArgumentException::class);
5252
self::expectExceptionMessage('Position must be positive, got: 0.');
53-
new GenomicPosition(new Chromosome('chr1'), 0);
53+
NucleotidePosition::fromOneBased(0);
54+
}
55+
56+
public function testConstructorRejectsNegativePosition(): void
57+
{
58+
self::expectException(\InvalidArgumentException::class);
59+
self::expectExceptionMessage('Position must be positive, got: -1.');
60+
NucleotidePosition::fromOneBased(-1);
5461
}
5562

5663
/** @return iterable<array{string}> */
5764
public static function invalidFormats(): iterable
5865
{
5966
yield ['11:1test'];
6067
yield ['chr1:0'];
68+
yield ['chr1:-1'];
6169
yield ['chr1:'];
6270
yield ['chr1'];
6371
}
@@ -67,6 +75,6 @@ public static function invalidFormats(): iterable
6775
public function testParseRejectsInvalidFormat(string $value): void
6876
{
6977
self::expectException(\InvalidArgumentException::class);
70-
GenomicPosition::parse($value);
78+
GenomicPosition::parseOneBased($value);
7179
}
7280
}

0 commit comments

Comments
 (0)