Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,14 @@ requires-python = ">=3.11"

dependencies = [
"agct~=0.1.0",
"requests",
"biopython",
"tqdm",
"cdot",
"click",
"cool-seq-tool==0.4.0.dev3",
"ga4gh.vrs==2.0.0-a6",
"gene_normalizer[etl,pg]==0.3.0-dev2",
"httpx~=0.28",
"pydantic>=2",
"python-dotenv",
"setuptools>=68.0", # tmp -- ensure 3.12 compatibility
Expand All @@ -61,7 +61,7 @@ tests = [
"pytest-mock",
"pytest-cov",
"pytest-asyncio",
"requests-mock"
"respx"
]
dev = [
"ruff==0.2.0",
Expand Down
15 changes: 13 additions & 2 deletions src/api/routers/map.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from cool_seq_tool.schemas import AnnotationLayer
from fastapi import APIRouter, HTTPException
from fastapi.responses import JSONResponse
from requests import HTTPError
from httpx import HTTPStatusError

from dcd_mapping.align import build_alignment_result
from dcd_mapping.annotate import (
Expand Down Expand Up @@ -64,6 +64,7 @@ async def map_scoreset(urn: str, store_path: Path | None = None) -> JSONResponse
records = get_scoreset_records(metadata, True, store_path)
metadata = patch_target_sequence_type(metadata, records, force=False)
except ScoresetNotSupportedError as e:
_logger.error("Scoreset not supported for %s: %s", urn, e)
return JSONResponse(
content=ScoresetMapping(
metadata=None,
Expand All @@ -72,6 +73,7 @@ async def map_scoreset(urn: str, store_path: Path | None = None) -> JSONResponse
)
except ResourceAcquisitionError as e:
msg = f"Unable to acquire resource from MaveDB: {e}"
_logger.error(msg)
raise HTTPException(status_code=500, detail=msg) from e

if not records:
Expand All @@ -87,17 +89,21 @@ async def map_scoreset(urn: str, store_path: Path | None = None) -> JSONResponse
alignment_results = build_alignment_result(metadata, True)
except BlatNotFoundError as e:
msg = "BLAT command appears missing. Ensure it is available on the $PATH or use the environment variable BLAT_BIN_PATH to point to it. See instructions in the README prerequisites section for more."
_logger.error("BLAT not found for %s: %s", urn, e)
raise HTTPException(status_code=500, detail=msg) from e
except ResourceAcquisitionError as e:
msg = f"BLAT resource could not be acquired: {e}"
_logger.error(msg)
raise HTTPException(status_code=500, detail=msg) from e
except AlignmentError as e:
_logger.error("Alignment error for %s: %s", urn, e)
return JSONResponse(
content=ScoresetMapping(
metadata=metadata, error_message=str(e).strip("'")
).model_dump(exclude_none=True)
)
except ScoresetNotSupportedError as e:
_logger.error("Scoreset not supported during alignment for %s: %s", urn, e)
return JSONResponse(
content=ScoresetMapping(
metadata=metadata, error_message=str(e).strip("'")
Expand All @@ -111,11 +117,13 @@ async def map_scoreset(urn: str, store_path: Path | None = None) -> JSONResponse
# on the target level and on the variant level for variants relative to that target
# HTTPErrors and DataLookupErrors cause the mapping process to exit because these indicate
# underlying issues with data providers.
except HTTPError as e:
except HTTPStatusError as e:
msg = f"HTTP error occurred during transcript selection: {e}"
_logger.error(msg)
raise HTTPException(status_code=500, detail=msg) from e
except DataLookupError as e:
msg = f"Data lookup error occurred during transcript selection: {e}"
_logger.error(msg)
raise HTTPException(status_code=500, detail=msg) from e

vrs_results = {}
Expand All @@ -134,6 +142,7 @@ async def map_scoreset(urn: str, store_path: Path | None = None) -> JSONResponse
UnsupportedReferenceSequencePrefixError,
MissingSequenceIdError,
) as e:
_logger.error("VRS mapping error for %s: %s", urn, e)
return JSONResponse(
content=ScoresetMapping(
metadata=metadata, error_message=str(e).strip("'")
Expand Down Expand Up @@ -172,6 +181,7 @@ async def map_scoreset(urn: str, store_path: Path | None = None) -> JSONResponse
VrsVersion.V_2,
)
except Exception as e:
_logger.error("Unexpected error during annotation for %s: %s", urn, e)
return JSONResponse(
content=ScoresetMapping(
metadata=metadata, error_message=str(e).strip("'")
Expand Down Expand Up @@ -287,6 +297,7 @@ async def map_scoreset(urn: str, store_path: Path | None = None) -> JSONResponse
del reference_sequences[target_gene].layers[layer]

except Exception as e:
_logger.error("Unexpected error during result assembly for %s: %s", urn, e)
return JSONResponse(
content=ScoresetMapping(
metadata=metadata, error_message=str(e).strip("'")
Expand Down
8 changes: 4 additions & 4 deletions src/dcd_mapping/align.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from pathlib import Path
from urllib.parse import urlparse

import requests
import httpx
from Bio.SearchIO import HSP
from Bio.SearchIO import parse as parse_blat
from Bio.SearchIO._model import Hit, QueryResult
Expand Down Expand Up @@ -84,7 +84,7 @@ def get_ref_genome_file(
if not genome_file.exists():
try:
http_download(url, genome_file, silent)
except requests.HTTPError as e:
except httpx.HTTPStatusError as e:
msg = f"HTTPError when fetching reference genome file from {url}"
_logger.error(msg)
raise ResourceAcquisitionError(msg) from e
Expand Down Expand Up @@ -378,11 +378,11 @@ def fetch_alignment(
alignment_results[accession_id] = None
else:
url = f"{CDOT_URL}/transcript/{accession_id}"
r = requests.get(url, timeout=30)
r = httpx.get(url, timeout=30)

try:
r.raise_for_status()
except requests.HTTPError as e:
except httpx.HTTPStatusError as e:
msg = f"Received HTTPError from {url} for scoreset {metadata.urn}"
_logger.error(msg)
raise ResourceAcquisitionError(msg) from e
Expand Down
6 changes: 3 additions & 3 deletions src/dcd_mapping/lookup.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
from typing import Any

import hgvs
import httpx
import polars as pl
import requests
from biocommons.seqrepo import SeqRepo
from biocommons.seqrepo.seqaliasdb.seqaliasdb import sqlite3
from cdot.hgvs.dataproviders import ChainedSeqFetcher, FastaSeqFetcher, RESTDataProvider
Expand Down Expand Up @@ -682,7 +682,7 @@ def get_overlapping_features_for_region(
url, headers={"Content-Type": "application/json"}
)
response.raise_for_status()
except requests.RequestException as e:
except httpx.HTTPError as e:
_logger.error(
"Failed to fetch overlapping features for region %s-%s on chromosome %s: %s",
start,
Expand Down Expand Up @@ -715,7 +715,7 @@ def get_uniprot_sequence(uniprot_id: str) -> str | None:
:raise HTTPError: if response comes with an HTTP error code
"""
url = f"https://www.ebi.ac.uk/proteins/api/proteins?accession={uniprot_id.split(':')[1]}&format=json"
response = requests.get(url, timeout=30)
response = httpx.get(url, timeout=30)
response.raise_for_status()
json = response.json()
return json[0]["sequence"]["sequence"]
4 changes: 2 additions & 2 deletions src/dcd_mapping/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from pathlib import Path

import click
from requests import HTTPError
from httpx import HTTPStatusError

from dcd_mapping.align import build_alignment_result
from dcd_mapping.annotate import (
Expand Down Expand Up @@ -205,7 +205,7 @@ async def map_scoreset(
# on the target level and on the variant level for variants relative to that target
# HTTPErrors and DataLookupErrors cause the mapping process to exit because these indicate
# underlying issues with data providers.
except HTTPError as e:
except HTTPStatusError as e:
_emit_info(
f"HTTP error occurred during transcript selection: {e}",
silent,
Expand Down
14 changes: 7 additions & 7 deletions src/dcd_mapping/mavedb_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from pathlib import Path
from typing import Any

import requests
import httpx
from fastapi import HTTPException
from pydantic import ValidationError

Expand Down Expand Up @@ -57,7 +57,7 @@ def get_scoreset_urns() -> set[str]:

:return: set of URN strings
"""
r = requests.get(
r = httpx.get(
f"{MAVEDB_BASE_URL}/api/v1/experiments/",
timeout=30,
headers=authentication_header(),
Expand Down Expand Up @@ -101,14 +101,14 @@ def get_human_urns() -> list[str]:
scoreset_urns = get_scoreset_urns()
human_scoresets: list[str] = []
for urn in scoreset_urns:
r = requests.get(
r = httpx.get(
f"{MAVEDB_BASE_URL}/api/v1/score-sets/{urn}",
timeout=30,
headers=authentication_header(),
)
try:
r.raise_for_status()
except requests.exceptions.HTTPError:
except httpx.HTTPStatusError:
_logger.info("Unable to retrieve scoreset data for URN %s", urn)
continue
data = r.json()
Expand Down Expand Up @@ -156,10 +156,10 @@ def get_raw_scoreset_metadata(
metadata_file = dcd_mapping_dir / f"{scoreset_urn}_metadata.json"
if not metadata_file.exists():
url = f"{MAVEDB_BASE_URL}/api/v1/score-sets/{scoreset_urn}"
r = requests.get(url, timeout=30, headers=authentication_header())
r = httpx.get(url, timeout=30, headers=authentication_header())
try:
r.raise_for_status()
except requests.HTTPError as e:
except httpx.HTTPStatusError as e:
msg = f"Received HTTPError from {url} for scoreset {scoreset_urn}"
_logger.error(msg)
raise ResourceAcquisitionError(msg) from e
Expand Down Expand Up @@ -318,7 +318,7 @@ def get_scoreset_records(
url = f"{MAVEDB_BASE_URL}/api/v1/score-sets/{metadata.urn}/scores"
try:
http_download(url, scores_csv, silent)
except requests.HTTPError as e:
except httpx.HTTPStatusError as e:
msg = f"HTTPError when fetching scores CSV from {url}"
_logger.error(msg)
raise ResourceAcquisitionError(msg) from e
Expand Down
20 changes: 9 additions & 11 deletions src/dcd_mapping/resource_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from pathlib import Path

import click
import requests
import httpx
from tqdm import tqdm

_logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -71,13 +71,11 @@ def http_download(url: str, out_path: Path, silent: bool = True) -> Path:
:param out_path: location to save file to
:param silent: show TQDM progress bar if true
:return: Path if download successful
:raise requests.HTTPError: if request is unsuccessful
:raise httpx.HTTPStatusError: if request is unsuccessful
"""
if not silent:
click.echo(f"Downloading {out_path.name} to {out_path.parents[0].absolute()}")
with requests.get(
url, stream=True, timeout=60, headers=authentication_header()
) as r:
with httpx.stream("GET", url, timeout=60, headers=authentication_header()) as r:
r.raise_for_status()
total_size = int(r.headers.get("content-length", 0))
with out_path.open("wb") as h:
Expand All @@ -89,20 +87,20 @@ def http_download(url: str, out_path: Path, silent: bool = True) -> Path:
desc=out_path.name,
ncols=80,
) as progress_bar:
for chunk in r.iter_content(chunk_size=8192):
for chunk in r.iter_bytes(chunk_size=8192):
if chunk:
h.write(chunk)
progress_bar.update(len(chunk))
else:
for chunk in r.iter_content(chunk_size=8192):
for chunk in r.iter_bytes(chunk_size=8192):
if chunk:
h.write(chunk)
return out_path


def request_with_backoff(
url: str, max_retries: int = 5, backoff_factor: float = 0.3, **kwargs
) -> requests.Response:
) -> httpx.Response:
"""HTTP GET with exponential backoff only for retryable errors.

Retries on:
Expand All @@ -115,9 +113,9 @@ def request_with_backoff(
attempt = 0
while attempt < max_retries:
try:
kwargs.setdefault("timeout", 60) # Default timeout of 10 seconds
response = requests.get(url, **kwargs) # noqa: S113
except (requests.Timeout, requests.ConnectionError):
kwargs.setdefault("timeout", 60)
response = httpx.get(url, **kwargs)
except (httpx.TimeoutException, httpx.ConnectError):
# Retry on transient network failures
if attempt == max_retries - 1:
raise
Expand Down
4 changes: 2 additions & 2 deletions tests/test_lookup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from unittest.mock import patch

import requests
import httpx

from dcd_mapping.lookup import get_overlapping_features_for_region

Expand Down Expand Up @@ -95,7 +95,7 @@ def __init__(self):

def raise_for_status(self):
msg = f"HTTP {self.status_code} Error"
raise requests.RequestException(msg)
raise httpx.HTTPError(msg)

with (
patch("dcd_mapping.lookup.request_with_backoff", return_value=ErrorResponse()),
Expand Down
22 changes: 10 additions & 12 deletions tests/test_mavedb_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@
import shutil
from pathlib import Path

import httpx
import pytest
import requests_mock
import respx

from dcd_mapping.mavedb_data import get_scoreset_metadata, get_scoreset_records

Expand Down Expand Up @@ -32,10 +33,9 @@ def test_get_scoreset_metadata(
resources_data_dir: Path, scoreset_metadata_response: dict
):
urn = "urn:mavedb:00000093-a-1"
with requests_mock.Mocker() as m:
m.get(
f"https://api.mavedb.org/api/v1/score-sets/{urn}",
json=scoreset_metadata_response[urn],
with respx.mock:
respx.get(f"https://api.mavedb.org/api/v1/score-sets/{urn}").mock(
return_value=httpx.Response(200, json=scoreset_metadata_response[urn])
)
scoreset_metadata = get_scoreset_metadata(
urn, dcd_mapping_dir=resources_data_dir
Expand All @@ -62,17 +62,15 @@ def test_get_scoreset_records(
urn = "urn:mavedb:00000093-a-1"
with (fixture_data_dir / f"{urn}_scores.csv").open() as f:
scores_csv_text = f.read()
with requests_mock.Mocker() as m:
m.get(
f"https://api.mavedb.org/api/v1/score-sets/{urn}",
json=scoreset_metadata_response[urn],
with respx.mock:
respx.get(f"https://api.mavedb.org/api/v1/score-sets/{urn}").mock(
return_value=httpx.Response(200, json=scoreset_metadata_response[urn])
)
scoreset_metadata = get_scoreset_metadata(
urn, dcd_mapping_dir=resources_data_dir
)
m.get(
f"https://api.mavedb.org/api/v1/score-sets/{urn}/scores",
text=scores_csv_text,
respx.get(f"https://api.mavedb.org/api/v1/score-sets/{urn}/scores").mock(
return_value=httpx.Response(200, text=scores_csv_text)
)
scoreset_records = get_scoreset_records(
scoreset_metadata, dcd_mapping_dir=resources_data_dir
Expand Down
Loading