diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index c25ffd40..c5c1cdf5 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -40,6 +40,7 @@ jobs: env: DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }} DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - name: Copy repos folder for getting the extension logos run: | diff --git a/.hooks/pre-push b/.hooks/pre-push index e86a7287..1370eb3a 100755 --- a/.hooks/pre-push +++ b/.hooks/pre-push @@ -20,4 +20,7 @@ pylint $(git ls-files '*.py') echo "Running mypy.." git ls-files '*.py' | xargs --max-lines=1 mypy +echo "Running pytest.." +pytest tests/ -vs + exit 0 \ No newline at end of file diff --git a/blueos_repository/docker/auth.py b/blueos_repository/docker/auth.py index 200419a7..4d7fb04e 100644 --- a/blueos_repository/docker/auth.py +++ b/blueos_repository/docker/auth.py @@ -8,34 +8,53 @@ class DockerAuthAPI: # pylint: disable=too-few-public-methods """ - This class is used to interact with the Docker Auth API. + This class is used to interact with a Docker-compatible token authentication API. + + Supports Docker Hub (auth.docker.io), GHCR (ghcr.io), and other OCI-compliant + registries that follow the Docker token authentication specification. More details in https://distribution.github.io/distribution/spec/auth/token/ """ - __api_url: str = "https://auth.docker.io" - - def __init__(self, username: Optional[str] = None, password: Optional[str] = None, max_retries: int = 5) -> None: + def __init__( # pylint: disable=too-many-arguments,too-many-positional-arguments + self, + auth_url: str = "https://auth.docker.io", + service: str = "registry.docker.io", + username: Optional[str] = None, + password: Optional[str] = None, + max_retries: int = 5, + ) -> None: """ Constructor for the DockerAuthAPI class. Args: - username: The username to be used in the authentication (optional) - password: The password to be used in the authentication (optional) + auth_url: Base URL for the token endpoint (e.g. "https://auth.docker.io" or "https://ghcr.io"). + service: The ``service`` query parameter sent to the token endpoint. + username: The username to be used in the authentication (optional). + password: The password to be used in the authentication (optional). max_retries: The maximum number of retries to be used in case of request failure. Defaults to 5. Returns: None """ + self.__api_url: str = auth_url + self.__service: str = service self.__auth_header: Optional[str] = None if username and password: self.__auth_header = f"Basic {base64.b64encode(f'{username}:{password}'.encode()).decode()}" - elif "DOCKER_USERNAME" in os.environ and "DOCKER_PASSWORD" in os.environ: - username = os.environ["DOCKER_USERNAME"] - password = os.environ["DOCKER_PASSWORD"] - self.__auth_header = f"Basic {base64.b64encode(f'{username}:{password}'.encode()).decode()}" + elif service == "registry.docker.io": + # Docker Hub credentials from environment + if "DOCKER_USERNAME" in os.environ and "DOCKER_PASSWORD" in os.environ: + env_user = os.environ["DOCKER_USERNAME"] + env_pass = os.environ["DOCKER_PASSWORD"] + self.__auth_header = f"Basic {base64.b64encode(f'{env_user}:{env_pass}'.encode()).decode()}" + elif service == "ghcr.io": + # GitHub Container Registry – use a GitHub token when available + token = os.environ.get("GITHUB_TOKEN") or os.environ.get("GH_TOKEN") + if token: + self.__auth_header = f"Basic {base64.b64encode(f'token:{token}'.encode()).decode()}" self.__retry_options = aiohttp_retry.ExponentialRetry(attempts=max_retries) @@ -51,17 +70,17 @@ async def get_token(self, repo: str) -> AuthToken: """ params = { - "service": "registry.docker.io", + "service": self.__service, "scope": f"repository:{repo}:pull", } headers = {"Authorization": self.__auth_header} if self.__auth_header else {} - auth_url = f"{self.__api_url}/token?service=registry.docker.io&scope=repository:{repo}:pull" + auth_url = f"{self.__api_url}/token" async with aiohttp_retry.RetryClient(retry_options=self.__retry_options) as session: async with session.get(auth_url, params=params, headers=headers) as resp: if resp.status != 200: - error_msg = f"Error on Docker Auth API with status {resp.status}" + error_msg = f"Error on Docker Auth API ({self.__api_url}) with status {resp.status}" print(error_msg) raise Exception(error_msg) diff --git a/blueos_repository/docker/image_ref.py b/blueos_repository/docker/image_ref.py new file mode 100644 index 00000000..637aae31 --- /dev/null +++ b/blueos_repository/docker/image_ref.py @@ -0,0 +1,77 @@ +import dataclasses + + +@dataclasses.dataclass +class DockerImageRef: + """ + Parsed Docker image reference. + + Handles both Docker Hub short references (e.g. "bluerobotics/blueos-doris") + and fully-qualified references with a registry hostname + (e.g. "ghcr.io/bluerobotics/blueos-doris"). + """ + + registry: str + repository: str + + @staticmethod + def parse(docker: str) -> "DockerImageRef": + """ + Parse a Docker image reference string into registry and repository components. + + Args: + docker: The docker image reference, e.g.: + - "bluerobotics/cockpit" → docker.io / bluerobotics/cockpit + - "ghcr.io/bluerobotics/blueos-doris" → ghcr.io / bluerobotics/blueos-doris + - "docker.io/bluerobotics/cockpit" → docker.io / bluerobotics/cockpit + - "registry.example.com/org/repo" → registry.example.com / org/repo + + Returns: + A DockerImageRef with the extracted registry and repository. + """ + + # Strip any tag or digest suffix so we only deal with the image name + name = docker.split("@")[0].split(":")[0] + parts = name.split("/") + + # If the first part looks like a hostname (contains a dot or a colon, + # or is "localhost"), treat it as the registry. + if len(parts) >= 3 and ("." in parts[0] or ":" in parts[0] or parts[0] == "localhost"): + registry = parts[0] + repository = "/".join(parts[1:]) + else: + registry = "docker.io" + repository = name + + return DockerImageRef(registry=registry, repository=repository) + + @property + def is_dockerhub(self) -> bool: + return self.registry in ("docker.io", "registry-1.docker.io", "index.docker.io") + + @property + def is_ghcr(self) -> bool: + return self.registry == "ghcr.io" + + @property + def registry_url(self) -> str: + """Base URL for the Docker Registry V2 API.""" + if self.is_dockerhub: + return "https://registry-1.docker.io" + return f"https://{self.registry}" + + @property + def auth_url(self) -> str: + """Base URL for the token authentication endpoint.""" + if self.is_dockerhub: + return "https://auth.docker.io" + # GHCR (and most OCI registries) serve the token endpoint on the + # same host as the registry itself. + return f"https://{self.registry}" + + @property + def auth_service(self) -> str: + """The ``service`` parameter sent to the token endpoint.""" + if self.is_dockerhub: + return "registry.docker.io" + return self.registry diff --git a/blueos_repository/docker/models/manifest.py b/blueos_repository/docker/models/manifest.py index a4402562..e0af2662 100644 --- a/blueos_repository/docker/models/manifest.py +++ b/blueos_repository/docker/models/manifest.py @@ -48,13 +48,13 @@ class ManifestList: Attributes: schemaVersion (int): This field specifies the image manifest schema version as an integer. This schema uses the version 2. - mediaType (str): The MIME type of the manifest list. This should be set to application/vnd.docker.distribution.manifest.list.v2+json. manifests (List[Manifest]): The manifests field contains a list of manifests for specific platforms. + mediaType (Optional[str]): The MIME type of the manifest list. """ schemaVersion: int # pylint: disable=invalid-name - mediaType: str # pylint: disable=invalid-name manifests: List[Manifest] + mediaType: Optional[str] = None # pylint: disable=invalid-name @dataclasses.dataclass @@ -98,15 +98,15 @@ class ImageManifest: Attributes: schemaVersion (int): The image manifest schema version as an integer, version 2 is expected. - mediaType (str): MIME type of the manifest, expected to be application/vnd.docker.distribution.manifest.v2+json. + mediaType (Optional[str]): MIME type of the manifest. Optional per OCI spec. config (ConfigObject): The configuration object for a container by digest. layers (List[Layer]): Ordered list of layers starting from the base image. """ schemaVersion: int # pylint: disable=invalid-name - mediaType: str # pylint: disable=invalid-name config: ConfigReference layers: List[ManifestLayer] + mediaType: Optional[str] = None # pylint: disable=invalid-name # Optional per OCI spec @dataclasses.dataclass diff --git a/blueos_repository/docker/registry.py b/blueos_repository/docker/registry.py index 624d0f3d..00c571a0 100644 --- a/blueos_repository/docker/registry.py +++ b/blueos_repository/docker/registry.py @@ -1,4 +1,4 @@ -from typing import Any, Optional +from typing import Any, List, Optional import aiohttp_retry from dataclass_wizard import fromdict @@ -9,18 +9,26 @@ from docker.models.manifest import ImageManifest, ManifestFetch, ManifestList -class DockerRegistry: +class DockerRegistry: # pylint: disable=too-many-instance-attributes """ - This class is used to interact with the Docker Registry API. + This class is used to interact with a Docker-compatible Registry V2 API. + + Supports Docker Hub (registry-1.docker.io), GHCR (ghcr.io), and other + OCI-compliant registries. More details in https://distribution.github.io/distribution/spec/api/ """ - __api_base_url: str = "https://registry-1.docker.io" - __api_version: str = "v2" - __api_url: str = f"{__api_base_url}/{__api_version}" - - __token: Optional[AuthToken] = None + # Media types accepted when fetching manifests. We request every format + # the codebase can handle so the registry returns the best match. + __manifest_accept: str = ",".join( + [ + "application/vnd.docker.distribution.manifest.v2+json", + "application/vnd.docker.distribution.manifest.list.v2+json", + "application/vnd.oci.image.manifest.v1+json", + "application/vnd.oci.image.index.v1+json", + ] + ) @staticmethod def from_preview() -> "DockerRegistry": @@ -33,18 +41,35 @@ def from_preview() -> "DockerRegistry": return DockerRegistry("ratelimitpreview/test") - def __init__(self, repository: str, max_retries: int = 5) -> None: + def __init__( # pylint: disable=too-many-arguments,too-many-positional-arguments + self, + repository: str, + registry_url: str = "https://registry-1.docker.io", + auth_url: str = "https://auth.docker.io", + auth_service: str = "registry.docker.io", + max_retries: int = 5, + ) -> None: """ - Constructor for the DockerHubAPI class. + Constructor for the DockerRegistry class. Args: - repository: Repository that this registry class will operate on + repository: Repository that this registry class will operate on. + registry_url: Base URL for the registry (e.g. "https://registry-1.docker.io" or "https://ghcr.io"). + auth_url: Base URL for the token authentication endpoint. + auth_service: The ``service`` parameter used for token requests. max_retries: The maximum number of retries to be used in case of request failure. Defaults to 5. Returns: None """ + self.repository = repository + self.__api_base_url: str = registry_url + self.__auth_url: str = auth_url + self.__auth_service: str = auth_service + self.__api_version: str = "v2" + self.__api_url: str = f"{self.__api_base_url}/{self.__api_version}" + self.__token: Optional[AuthToken] = None self.__retry_options = aiohttp_retry.ExponentialRetry(attempts=max_retries) async def __check_token(self) -> None: @@ -53,7 +78,7 @@ async def __check_token(self) -> None: """ if not self.__token or self.__token.is_expired: - auth = DockerAuthAPI() + auth = DockerAuthAPI(auth_url=self.__auth_url, service=self.__auth_service) self.__token = await auth.get_token(self.repository) async def __raise_pretty(self, resp: Any) -> None: @@ -81,12 +106,12 @@ async def __raise_pretty(self, resp: Any) -> None: async def get(self, route: str, max_retries: Optional[int] = None, **kwargs: Any) -> Any: """ - Make a GET request to the Docker Hub API. + Make a GET request to the Docker Registry V2 API. Args: route: The route to be used in the request. - params: The parameters to be used in the request. max_retries: The maximum number of retries to be used in case of request failure. Defaults to None. + **kwargs: Additional keyword arguments passed to the HTTP client (e.g. headers, params). Returns: The response from the request parsed as json. @@ -121,7 +146,7 @@ async def get_manifest(self, tag_or_digest: str) -> ManifestFetch: route = f"{self.repository}/manifests/{tag_or_digest}" header = { "Authorization": f"Bearer {self.__token.token if self.__token else ''}", - "Accept": "application/vnd.docker.distribution.manifest.v2+json,application/vnd.oci.image.manifest.v1+json,application/vnd.oci.image.index.v1+json", + "Accept": self.__manifest_accept, } manifest = await self.get(route, headers=header) @@ -129,6 +154,12 @@ async def get_manifest(self, tag_or_digest: str) -> ManifestFetch: if "config" in manifest: return ManifestFetch(manifest=fromdict(ImageManifest, manifest)) + # OCI indexes (e.g. from GHCR) may contain non-image entries such as + # attestation manifests that lack a "platform" field. Filter them out + # so the typed Manifest dataclass can require platform unconditionally. + if "manifests" in manifest: + manifest["manifests"] = [m for m in manifest["manifests"] if "platform" in m] + return ManifestFetch(manifest=fromdict(ManifestList, manifest)) async def get_manifest_blob(self, digest: str) -> Blob: @@ -147,13 +178,32 @@ async def get_manifest_blob(self, digest: str) -> Blob: route = f"{self.repository}/blobs/{digest}" header = { "Authorization": f"Bearer {self.__token.token if self.__token else ''}", - "Accept": "application/vnd.docker.distribution.manifest.v2+json,application/vnd.oci.image.manifest.v1+json,application/vnd.oci.image.index.v1+json", + "Accept": "application/vnd.oci.image.config.v1+json,application/json", } blob = await self.get(route, headers=header) return fromdict(Blob, blob) + async def list_tags(self) -> List[str]: + """ + List all tags for the repository using the standard V2 tag listing API. + + This is the registry-agnostic way to discover tags; it works on any + OCI-compliant registry (Docker Hub, GHCR, Quay, etc.). + + Returns: + A list of tag name strings. + """ + + await self.__check_token() + + route = f"{self.repository}/tags/list" + header = {"Authorization": f"Bearer {self.__token.token if self.__token else ''}"} + + data = await self.get(route, headers=header) + return list(data.get("tags", []) or []) + async def get_rate_limit(self) -> RateLimit: await self.__check_token() diff --git a/blueos_repository/extension/extension.py b/blueos_repository/extension/extension.py index 68f1d6d1..46b2bfc5 100644 --- a/blueos_repository/extension/extension.py +++ b/blueos_repository/extension/extension.py @@ -5,6 +5,7 @@ import aiohttp import json5 from docker.hub import DockerHub +from docker.image_ref import DockerImageRef from docker.models.blob import Blob from docker.models.manifest import ImageManifest, ManifestFetch, ManifestPlatform from docker.models.repo import RepoInfo @@ -48,9 +49,23 @@ def __init__(self, metadata: ExtensionMetadata) -> None: # Versions self.versions: Dict[str, ExtensionVersion] = {} - # Docker API - self.hub: DockerHub = DockerHub(metadata.docker) - self.registry: DockerRegistry = DockerRegistry(metadata.docker) + # Parse docker image reference to determine registry + self.image_ref: DockerImageRef = DockerImageRef.parse(metadata.docker) + + # Docker Registry V2 API (works for any OCI-compliant registry) + self.registry: DockerRegistry = DockerRegistry( + self.image_ref.repository, + registry_url=self.image_ref.registry_url, + auth_url=self.image_ref.auth_url, + auth_service=self.image_ref.auth_service, + ) + + # Docker Hub has a proprietary REST API that provides richer tag + # metadata (ordering, pull counts, per-image details). For other + # registries we fall back to the standard V2 tag list on self.registry. + self.docker_hub: Optional[DockerHub] = ( + DockerHub(self.image_ref.repository) if self.image_ref.is_dockerhub else None + ) @property def sorted_versions(self) -> Dict[str, ExtensionVersion]: @@ -80,14 +95,15 @@ async def process_readme_md(readme: str, resources_url: str) -> str: encoder = MarkdownImageEncoder(readme, resources_url) return str(await encoder.get_processed_markdown()) - def __extract_images_from_tag(self, tag: Tag) -> List[Image]: + @staticmethod + def __extract_images_from_tag(tag: Tag) -> List[Image]: active_images = [ image for image in tag.images if (image.status == "active" and image.architecture != "unknown" and image.os != "unknown") ] - images = [ + return [ Image( digest=image.digest if image.digest else None, expanded_size=image.size, @@ -99,7 +115,51 @@ def __extract_images_from_tag(self, tag: Tag) -> List[Image]: ) for image in active_images ] - return images + + @staticmethod + def __extract_images_from_manifest(manifest_fetch: ManifestFetch, blob: Blob) -> List[Image]: + """ + Derive per-platform image information directly from a registry manifest. + + This is the registry-agnostic path used when the Docker Hub tag API is + not available (e.g. GHCR, Quay, or any other OCI registry). + + Args: + manifest_fetch: The fetched manifest (may be a single image or a manifest list / OCI index). + blob: The blob config for the embedded (ARM) image — used for + platform info when the manifest is a single image. + + Returns: + List of Image objects. + """ + + if isinstance(manifest_fetch.manifest, ImageManifest): + return [ + Image( + digest=manifest_fetch.manifest.config.digest, + expanded_size=sum(layer.size for layer in manifest_fetch.manifest.layers), + platform=Platform( + architecture=blob.architecture or "unknown", + variant=None, + os=blob.os or "unknown", + ), + ) + ] + + # ManifestList / OCI Index — entry.size is the manifest document + # size, NOT the image layer size, so we report 0 (unknown) instead. + return [ + Image( + digest=entry.digest, + expanded_size=0, + platform=Platform( + architecture=entry.platform.architecture, + variant=entry.platform.variant if entry.platform.variant else None, + os=entry.platform.os if entry.platform.os else None, + ), + ) + for entry in manifest_fetch.manifest.manifests + ] def __is_compatible(self, platform: ManifestPlatform) -> bool: """ @@ -150,7 +210,25 @@ async def __extract_valid_embedded_digest(self, fetch: ManifestFetch) -> str: raise RuntimeError(f"Expected to have a valid image manifest but got a manifest list: {manifest_fetch}") - async def __create_version_from_tag_blob(self, version_tag: Tag, blob: Blob) -> ExtensionVersion: + async def __create_version( # pylint: disable=too-many-locals + self, + tag_name: str, + blob: Blob, + manifest: ManifestFetch, + hub_tag: Optional[Tag] = None, + ) -> ExtensionVersion: + """ + Build an :class:`ExtensionVersion` from the blob labels, manifest, and + (optionally) Docker Hub tag metadata. + + Args: + tag_name: The semver tag string (e.g. ``"1.2.3"``). + blob: The config blob for the embedded ARM image. + manifest: The top-level manifest fetch for this tag. + hub_tag: If available, the Docker Hub ``Tag`` object that provides + rich per-image metadata (sizes, architecture, status). + """ + labels = blob.config.Labels authors = labels.get("authors", "[]") @@ -163,7 +241,7 @@ async def __create_version_from_tag_blob(self, version_tag: Tag, blob: Blob) -> readme = labels.get("readme", None) if readme is not None: - url = readme.replace(r"{tag}", version_tag.name) + url = readme.replace(r"{tag}", tag_name) try: readme = await Extension.fetch_readme(url) try: @@ -175,17 +253,23 @@ async def __create_version_from_tag_blob(self, version_tag: Tag, blob: Blob) -> Logger.warning(self.identifier, str(error)) readme = "No README available" - images = self.__extract_images_from_tag(version_tag) + # Prefer Docker Hub's rich per-image data when available; otherwise + # derive the image list from the manifest (works for any registry). + images: List[Image] = [] + if hub_tag: + images = self.__extract_images_from_tag(hub_tag) + if not images: + images = self.__extract_images_from_manifest(manifest, blob) if not images: Logger.error( self.identifier, - f"Could not find images associated with tag {version_tag.name} for extension {self.identifier}", + f"Could not find images associated with tag {tag_name} for extension {self.identifier}", ) - tag_identifier = str(uuid.uuid5(uuid.NAMESPACE_DNS, f"{self.identifier}.{version_tag.name}")) + tag_identifier = str(uuid.uuid5(uuid.NAMESPACE_DNS, f"{self.identifier}.{tag_name}")) return ExtensionVersion( identifier=tag_identifier, - tag=version_tag.name, + tag=tag_name, type=ExtensionType(labels.get("type", ExtensionType.OTHER.value)), website=links.pop("website", labels.get("website", None)), readme=readme, @@ -197,19 +281,19 @@ async def __create_version_from_tag_blob(self, version_tag: Tag, blob: Blob) -> docs=json5.loads(docs_raw) if docs_raw else None, company=json5.loads(company_raw) if company_raw else None, permissions=json5.loads(permissions_raw) if permissions_raw else None, - images=self.__extract_images_from_tag(version_tag), + images=images, ) - async def __process_tag_version(self, tag: Tag) -> None: + async def __process_tag(self, tag_name: str, hub_tag: Optional[Tag] = None) -> None: """ - Process a tag and create a version object for it and store it in the versions - dictionary property. + Fetch the manifest and blob for *tag_name*, build an + :class:`ExtensionVersion`, and store it in ``self.versions``. Args: - tag (Tag): Tag to process. + tag_name: The tag string to process. + hub_tag: Optional Docker Hub ``Tag`` object for richer metadata. """ - tag_name = tag.name try: if not valid_semver(tag_name): raise ValueError(f"Invalid version naming: {tag_name}") @@ -220,7 +304,7 @@ async def __process_tag_version(self, tag: Tag) -> None: embedded_digest = await self.__extract_valid_embedded_digest(manifest) blob = await self.registry.get_manifest_blob(embedded_digest) - self.versions[tag_name] = await self.__create_version_from_tag_blob(tag, blob) + self.versions[tag_name] = await self.__create_version(tag_name, blob, manifest, hub_tag) Logger.info(self.identifier, f"Generated version entry {tag_name} for extension {self.identifier}") except ValueError as error: @@ -251,15 +335,20 @@ async def inflate(self, tag: Optional[Tag] = None) -> None: """ if tag: - return await self.__process_tag_version(tag) + return await self.__process_tag(tag.name, hub_tag=tag) try: - tags = await self.hub.get_tags() - self.repo_info = await self.hub.repo_info() + if self.docker_hub: + # Docker Hub: use its proprietary API for ordered results, + # rich per-image metadata, and download stats. + hub_tags = await self.docker_hub.get_tags() + self.repo_info = await self.docker_hub.repo_info() + + await asyncio.gather(*(self.__process_tag(t.name, hub_tag=t) for t in hub_tags.results)) + else: + # Any other OCI registry: use the standard V2 tag list. + tag_names = await self.registry.list_tags() + + await asyncio.gather(*(self.__process_tag(name) for name in tag_names)) except Exception as error: # pylint: disable=broad-except Logger.error(self.identifier, f"Unable to fetch tags for {self.identifier}, error: {error}") - return - - # We may want to split and process first 5 tags prior to make sure we dont reach limit and always have the - # latest ones processed. - await asyncio.gather(*(self.__process_tag_version(tag) for tag in tags.results)) diff --git a/pyproject.toml b/pyproject.toml index cbd6812f..a7488fa4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,6 +28,8 @@ dev = [ "mypy==1.15.0", "pydantic==1.9.0", "pylint==3.3.7", + "pytest>=8.0", + "pytest-asyncio>=0.24", "types-Markdown==3.6.0.20240316", ] @@ -38,6 +40,9 @@ packages = [ "blueos_repository.extension" ] +[tool.pytest.ini_options] +asyncio_mode = "auto" + [tool.black] line-length = 120 diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 00000000..1f68ff50 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,7 @@ +import sys +from pathlib import Path + +# The project's internal imports are relative to blueos_repository/ +# (e.g. ``from docker.auth import DockerAuthAPI``). Add it to sys.path +# so pytest can resolve these the same way the consolidation script does. +sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "blueos_repository")) diff --git a/tests/test_registries.py b/tests/test_registries.py new file mode 100644 index 00000000..00bc6ee0 --- /dev/null +++ b/tests/test_registries.py @@ -0,0 +1,252 @@ +""" +Integration tests for Docker Hub and GHCR registry support. + +These tests hit real registry APIs (no mocking) using two public repositories: + - Docker Hub: bluerobotics/cockpit + - GHCR: ghcr.io/bluerobotics/blueos-doris +""" + +# pylint: disable=redefined-outer-name + +import pytest +from docker.auth import DockerAuthAPI +from docker.image_ref import DockerImageRef +from docker.models.manifest import ImageManifest, ManifestList +from docker.registry import DockerRegistry +from extension.extension import Extension +from extension.models import ExtensionMetadata + +DOCKERHUB_IMAGE = "bluerobotics/cockpit" +GHCR_IMAGE = "ghcr.io/bluerobotics/blueos-doris" + + +@pytest.fixture +def dockerhub_ref() -> DockerImageRef: + return DockerImageRef.parse(DOCKERHUB_IMAGE) + + +@pytest.fixture +def ghcr_ref() -> DockerImageRef: + return DockerImageRef.parse(GHCR_IMAGE) + + +@pytest.fixture +def dockerhub_registry(dockerhub_ref: DockerImageRef) -> DockerRegistry: + return DockerRegistry( + dockerhub_ref.repository, + registry_url=dockerhub_ref.registry_url, + auth_url=dockerhub_ref.auth_url, + auth_service=dockerhub_ref.auth_service, + ) + + +@pytest.fixture +def ghcr_registry(ghcr_ref: DockerImageRef) -> DockerRegistry: + return DockerRegistry( + ghcr_ref.repository, + registry_url=ghcr_ref.registry_url, + auth_url=ghcr_ref.auth_url, + auth_service=ghcr_ref.auth_service, + ) + + +class TestDockerImageRef: + def test_parse_dockerhub_short(self) -> None: + ref = DockerImageRef.parse("bluerobotics/cockpit") + assert ref.registry == "docker.io" + assert ref.repository == "bluerobotics/cockpit" + assert ref.is_dockerhub is True + assert ref.is_ghcr is False + + def test_parse_dockerhub_explicit(self) -> None: + ref = DockerImageRef.parse("docker.io/bluerobotics/cockpit") + assert ref.registry == "docker.io" + assert ref.repository == "bluerobotics/cockpit" + assert ref.is_dockerhub is True + + def test_parse_ghcr(self) -> None: + ref = DockerImageRef.parse("ghcr.io/bluerobotics/blueos-doris") + assert ref.registry == "ghcr.io" + assert ref.repository == "bluerobotics/blueos-doris" + assert ref.is_ghcr is True + assert ref.is_dockerhub is False + + def test_parse_strips_tag(self) -> None: + ref = DockerImageRef.parse("ghcr.io/bluerobotics/blueos-doris:0.0.1") + assert ref.repository == "bluerobotics/blueos-doris" + + def test_parse_strips_digest(self) -> None: + ref = DockerImageRef.parse("ghcr.io/org/repo@sha256:abcdef1234567890") + assert ref.repository == "org/repo" + + def test_registry_url_dockerhub(self, dockerhub_ref: DockerImageRef) -> None: + assert dockerhub_ref.registry_url == "https://registry-1.docker.io" + + def test_registry_url_ghcr(self, ghcr_ref: DockerImageRef) -> None: + assert ghcr_ref.registry_url == "https://ghcr.io" + + def test_auth_url_dockerhub(self, dockerhub_ref: DockerImageRef) -> None: + assert dockerhub_ref.auth_url == "https://auth.docker.io" + + def test_auth_url_ghcr(self, ghcr_ref: DockerImageRef) -> None: + assert ghcr_ref.auth_url == "https://ghcr.io" + + def test_auth_service_dockerhub(self, dockerhub_ref: DockerImageRef) -> None: + assert dockerhub_ref.auth_service == "registry.docker.io" + + def test_auth_service_ghcr(self, ghcr_ref: DockerImageRef) -> None: + assert ghcr_ref.auth_service == "ghcr.io" + + +class TestDockerAuth: + async def test_dockerhub_token(self, dockerhub_ref: DockerImageRef) -> None: + auth = DockerAuthAPI(auth_url=dockerhub_ref.auth_url, service=dockerhub_ref.auth_service) + token = await auth.get_token(dockerhub_ref.repository) + assert token.token is not None + assert len(token.token) > 0 + assert token.is_expired is False + + async def test_ghcr_token(self, ghcr_ref: DockerImageRef) -> None: + auth = DockerAuthAPI(auth_url=ghcr_ref.auth_url, service=ghcr_ref.auth_service) + token = await auth.get_token(ghcr_ref.repository) + assert token.token is not None + assert len(token.token) > 0 + assert token.is_expired is False + + +class TestDockerRegistryTagList: + async def test_dockerhub_list_tags(self, dockerhub_registry: DockerRegistry) -> None: + tags = await dockerhub_registry.list_tags() + assert isinstance(tags, list) + assert len(tags) > 0 + # cockpit has semver tags + assert any("." in tag for tag in tags) + + async def test_ghcr_list_tags(self, ghcr_registry: DockerRegistry) -> None: + tags = await ghcr_registry.list_tags() + assert isinstance(tags, list) + assert len(tags) > 0 + assert any("." in tag for tag in tags) + + +class TestDockerRegistryManifest: + async def test_dockerhub_get_manifest(self, dockerhub_registry: DockerRegistry) -> None: + tags = await dockerhub_registry.list_tags() + # Pick a semver-looking tag + semver_tags = [t for t in tags if "." in t and t[0].isdigit()] + assert len(semver_tags) > 0, "Expected at least one semver tag for cockpit" + + fetch = await dockerhub_registry.get_manifest(semver_tags[0]) + manifest = fetch.manifest + + # Could be a single image or a manifest list + assert isinstance(manifest, (ImageManifest, ManifestList)) + + if isinstance(manifest, ManifestList): + assert len(manifest.manifests) > 0 + # All entries must have platform (attestation entries filtered) + for entry in manifest.manifests: + assert entry.platform is not None + assert entry.platform.architecture + assert entry.platform.os + + async def test_ghcr_get_manifest(self, ghcr_registry: DockerRegistry) -> None: + tags = await ghcr_registry.list_tags() + semver_tags = [t for t in tags if "." in t and t[0].isdigit()] + assert len(semver_tags) > 0, "Expected at least one semver tag for blueos-doris" + + fetch = await ghcr_registry.get_manifest(semver_tags[0]) + manifest = fetch.manifest + + assert isinstance(manifest, (ImageManifest, ManifestList)) + + if isinstance(manifest, ManifestList): + assert len(manifest.manifests) > 0 + # Attestation manifests must have been filtered — all entries have platform + for entry in manifest.manifests: + assert entry.platform is not None + assert entry.platform.architecture + assert entry.platform.os + + +class TestDockerRegistryBlob: + async def test_dockerhub_get_blob(self, dockerhub_registry: DockerRegistry) -> None: + tags = await dockerhub_registry.list_tags() + semver_tags = [t for t in tags if "." in t and t[0].isdigit()] + fetch = await dockerhub_registry.get_manifest(semver_tags[0]) + + # Resolve to a single-image manifest to get config digest + if isinstance(fetch.manifest, ManifestList): + entry = fetch.manifest.manifests[0] + fetch = await dockerhub_registry.get_manifest(entry.digest) + + assert isinstance(fetch.manifest, ImageManifest) + blob = await dockerhub_registry.get_manifest_blob(fetch.manifest.config.digest) + + assert blob.config is not None + assert isinstance(blob.config.Labels, dict) + assert blob.architecture is not None + + async def test_ghcr_get_blob(self, ghcr_registry: DockerRegistry) -> None: + tags = await ghcr_registry.list_tags() + semver_tags = [t for t in tags if "." in t and t[0].isdigit()] + fetch = await ghcr_registry.get_manifest(semver_tags[0]) + + if isinstance(fetch.manifest, ManifestList): + entry = fetch.manifest.manifests[0] + fetch = await ghcr_registry.get_manifest(entry.digest) + + assert isinstance(fetch.manifest, ImageManifest) + blob = await ghcr_registry.get_manifest_blob(fetch.manifest.config.digest) + + assert blob.config is not None + assert isinstance(blob.config.Labels, dict) + assert blob.architecture is not None + + +class TestExtensionInflate: + @staticmethod + def _make_metadata(docker: str, identifier: str) -> ExtensionMetadata: + return ExtensionMetadata( + identifier=identifier, + name=identifier, + website="https://example.com", + docker=docker, + description="Test extension", + ) + + async def test_dockerhub_extension_inflate(self) -> None: + metadata = self._make_metadata(DOCKERHUB_IMAGE, "test.cockpit") + ext = Extension(metadata) + + assert ext.docker_hub is not None + assert ext.image_ref.is_dockerhub is True + + await ext.inflate() + + assert len(ext.versions) > 0, "Expected at least one valid version from Docker Hub" + + for tag_name, version in ext.versions.items(): + assert "." in tag_name # semver + assert len(version.images) > 0 + for image in version.images: + assert image.platform is not None + assert image.platform.architecture + + async def test_ghcr_extension_inflate(self) -> None: + metadata = self._make_metadata(GHCR_IMAGE, "test.doris") + ext = Extension(metadata) + + assert ext.docker_hub is None + assert ext.image_ref.is_ghcr is True + + await ext.inflate() + + assert len(ext.versions) > 0, "Expected at least one valid version from GHCR" + + for tag_name, version in ext.versions.items(): + assert "." in tag_name # semver + assert len(version.images) > 0 + for image in version.images: + assert image.platform is not None + assert image.platform.architecture