merge main

satvshr · satvshr · commit edde570e959e · 2026-03-27T02:23:04.000+05:30
diff --git a/openml/_api/resources/base/resources.py b/openml/_api/resources/base/resources.py
@@ -12,7 +12,8 @@
 if TYPE_CHECKING:
     import pandas as pd
 
-    from openml import OpenMLEvaluation
+    from openml.estimation_procedures import OpenMLEstimationProcedure
+    from openml.evaluations import OpenMLEvaluation
     from openml.flows.flow import OpenMLFlow
     from openml.setups.setup import OpenMLSetup
     from openml.tasks.task import OpenMLTask, TaskType
@@ -87,6 +88,9 @@ class EstimationProcedureAPI(ResourceAPI):
 
     resource_type: ResourceType = ResourceType.ESTIMATION_PROCEDURE
 
+    @abstractmethod
+    def list(self) -> list[OpenMLEstimationProcedure]: ...
+
 
 class EvaluationAPI(ResourceAPI):
     """Abstract API interface for evaluation resources."""
diff --git a/openml/_api/resources/estimation_procedure.py b/openml/_api/resources/estimation_procedure.py
@@ -1,11 +1,84 @@
 from __future__ import annotations
 
+import warnings
+
+import xmltodict
+
+from openml.estimation_procedures.estimation_procedure import OpenMLEstimationProcedure
+from openml.tasks.task import TaskType
+
 from .base import EstimationProcedureAPI, ResourceV1API, ResourceV2API
 
 
 class EstimationProcedureV1API(ResourceV1API, EstimationProcedureAPI):
-    """Version 1 API implementation for estimation procedure resources."""
+    """V1 API implementation for estimation procedures.
+
+    Fetches estimation procedures from the v1 XML API endpoint.
+    """
+
+    def list(self) -> list[OpenMLEstimationProcedure]:
+        """Return a list of all estimation procedures which are on OpenML.
+
+        Returns
+        -------
+        procedures : list
+            A list of all estimation procedures. Every procedure is represented by
+            a dictionary containing the following information: id, task type id,
+            name, type, repeats, folds, stratified.
+        """
+        path = "estimationprocedure/list"
+        response = self._http.get(path)
+        xml_content = response.text
+
+        procs_dict = xmltodict.parse(xml_content)
+
+        # Minimalistic check if the XML is useful
+        if "oml:estimationprocedures" not in procs_dict:
+            raise ValueError("Error in return XML, does not contain tag oml:estimationprocedures.")
+
+        if "@xmlns:oml" not in procs_dict["oml:estimationprocedures"]:
+            raise ValueError(
+                "Error in return XML, does not contain tag "
+                "@xmlns:oml as a child of oml:estimationprocedures.",
+            )
+
+        if procs_dict["oml:estimationprocedures"]["@xmlns:oml"] != "http://openml.org/openml":
+            raise ValueError(
+                "Error in return XML, value of "
+                "oml:estimationprocedures/@xmlns:oml is not "
+                "http://openml.org/openml, but {}".format(
+                    str(procs_dict["oml:estimationprocedures"]["@xmlns:oml"])
+                ),
+            )
+
+        procs: list[OpenMLEstimationProcedure] = []
+        for proc_ in procs_dict["oml:estimationprocedures"]["oml:estimationprocedure"]:
+            task_type_int = int(proc_["oml:ttid"])
+            try:
+                task_type_id = TaskType(task_type_int)
+                procs.append(
+                    OpenMLEstimationProcedure(
+                        id=int(proc_["oml:id"]),
+                        task_type_id=task_type_id,
+                        name=proc_["oml:name"],
+                        type=proc_["oml:type"],
+                    )
+                )
+            except ValueError as e:
+                warnings.warn(
+                    f"Could not create task type id for {task_type_int} due to error {e}",
+                    RuntimeWarning,
+                    stacklevel=2,
+                )
+
+        return procs
 
 
 class EstimationProcedureV2API(ResourceV2API, EstimationProcedureAPI):
-    """Version 2 API implementation for estimation procedure resources."""
+    """V2 API implementation for estimation procedures.
+
+    Fetches estimation procedures from the v2 JSON API endpoint.
+    """
+
+    def list(self) -> list[OpenMLEstimationProcedure]:
+        self._not_supported(method="list")
diff --git a/openml/estimation_procedures/__init__.py b/openml/estimation_procedures/__init__.py
@@ -0,0 +1,5 @@
+# License: BSD 3-Clause
+
+from .estimation_procedure import OpenMLEstimationProcedure
+
+__all__ = ["OpenMLEstimationProcedure"]
diff --git a/openml/estimation_procedures/estimation_procedure.py b/openml/estimation_procedures/estimation_procedure.py
@@ -0,0 +1,50 @@
+# License: BSD 3-Clause
+from __future__ import annotations
+
+from dataclasses import asdict, dataclass
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from openml.tasks import TaskType
+
+
+@dataclass
+class OpenMLEstimationProcedure:
+    """
+    Contains all meta-information about a run / evaluation combination,
+    according to the evaluation/list function
+
+    Parameters
+    ----------
+    id : int
+        ID of estimation procedure
+    task_type_id : TaskType
+        Assosiated task type
+    name : str
+        Name of estimation procedure
+    type : str
+        Type of estimation procedure
+    """
+
+    id: int
+    task_type_id: TaskType
+    name: str
+    type: str
+
+    def _to_dict(self) -> dict:
+        return asdict(self)
+
+    def __repr__(self) -> str:
+        header = "OpenML Estimation Procedure"
+        header = f"{header}\n{'=' * len(header)}\n"
+
+        fields = {
+            "ID": self.id,
+            "Name": self.name,
+            "Type": self.type,
+            "Task Type": self.task_type_id,
+        }
+        longest_field_name_length = max(len(name) for name in fields)
+        field_line_format = f"{{:.<{longest_field_name_length}}}: {{}}"
+        body = "\n".join(field_line_format.format(name, value) for name, value in fields.items())
+        return header + body
diff --git a/openml/evaluations/functions.py b/openml/evaluations/functions.py
@@ -9,7 +9,6 @@
 
 import numpy as np
 import pandas as pd
-import xmltodict
 
 import openml
 import openml._api_calls
@@ -167,24 +166,8 @@ def list_estimation_procedures() -> list[str]:
     -------
     list
     """
-    api_call = "estimationprocedure/list"
-    xml_string = openml._api_calls._perform_api_call(api_call, "get")
-    api_results = xmltodict.parse(xml_string)
-
-    # Minimalistic check if the XML is useful
-    if "oml:estimationprocedures" not in api_results:
-        raise ValueError('Error in return XML, does not contain "oml:estimationprocedures"')
-
-    if "oml:estimationprocedure" not in api_results["oml:estimationprocedures"]:
-        raise ValueError('Error in return XML, does not contain "oml:estimationprocedure"')
-
-    if not isinstance(api_results["oml:estimationprocedures"]["oml:estimationprocedure"], list):
-        raise TypeError('Error in return XML, does not contain "oml:estimationprocedure" as a list')
-
-    return [
-        prod["oml:name"]
-        for prod in api_results["oml:estimationprocedures"]["oml:estimationprocedure"]
-    ]
+    result = openml._backend.estimation_procedure.list()
+    return [i.name for i in result]
 
 
 def list_evaluations_setups(
diff --git a/openml/tasks/functions.py b/openml/tasks/functions.py
@@ -1,14 +1,18 @@
 # License: BSD 3-Clause
 from __future__ import annotations
 
+import os
+import re
 import warnings
 from functools import partial
 from typing import TYPE_CHECKING, Any
 
 import pandas as pd
 
 import openml.utils
+from openml._api.resources.task import _create_task_from_xml
 from openml.datasets import get_dataset
+from openml.exceptions import OpenMLCacheException
 
 from .task import (
     OpenMLClassificationTask,
@@ -23,6 +27,63 @@
     from .task import (
         OpenMLTask,
     )
+TASKS_CACHE_DIR_NAME = "tasks"
+
+
+def _get_cached_tasks() -> dict[int, OpenMLTask]:
+    """Return a dict of all the tasks which are cached locally.
+
+    Returns
+    -------
+    tasks : OrderedDict
+        A dict of all the cached tasks. Each task is an instance of
+        OpenMLTask.
+    """
+    task_cache_dir = openml.utils._create_cache_directory(TASKS_CACHE_DIR_NAME)
+    directory_content = os.listdir(task_cache_dir)  # noqa: PTH208
+    directory_content.sort()
+
+    # Find all dataset ids for which we have downloaded the dataset
+    # description
+    tids = (int(did) for did in directory_content if re.match(r"[0-9]*", did))
+    return {tid: _get_cached_task(tid) for tid in tids}
+
+
+def _get_cached_task(tid: int) -> OpenMLTask:
+    """Return a cached task based on the given id.
+
+    Parameters
+    ----------
+    tid : int
+        Id of the task.
+
+    Returns
+    -------
+    OpenMLTask
+    """
+    tid_cache_dir = openml.utils._create_cache_directory_for_id(TASKS_CACHE_DIR_NAME, tid)
+
+    task_xml_path = tid_cache_dir / "task.xml"
+    try:
+        with task_xml_path.open(encoding="utf8") as fh:
+            return _create_task_from_xml(fh.read())
+    except OSError as e:
+        openml.utils._remove_cache_dir_for_id(TASKS_CACHE_DIR_NAME, tid_cache_dir)
+        raise OpenMLCacheException(f"Task file for tid {tid} not cached") from e
+
+
+def _get_estimation_procedure_list() -> list[dict[str, Any]]:
+    """Return a list of all estimation procedures which are on OpenML.
+
+    Returns
+    -------
+    procedures : list
+        A list of all estimation procedures. Every procedure is represented by
+        a dictionary containing the following information: id, task type id,
+        name, type, repeats, folds, stratified.
+    """
+    result = openml._backend.estimation_procedure.list()
+    return [i._to_dict() for i in result]
 
 
 def list_tasks(  # noqa: PLR0913
diff --git a/tests/test_api/test_estimation_procedure.py b/tests/test_api/test_estimation_procedure.py
@@ -0,0 +1,32 @@
+# License: BSD 3-Clause  
+from __future__ import annotations  
+  
+import pytest    
+from openml._api import EstimationProcedureV1API, EstimationProcedureV2API
+from openml.exceptions import OpenMLNotSupportedError
+from openml.estimation_procedures import OpenMLEstimationProcedure
+
+
+@pytest.fixture
+def estimation_procedure_v1(http_client_v1, minio_client) -> EstimationProcedureV1API:
+    return EstimationProcedureV1API(http=http_client_v1, minio=minio_client)
+
+
+@pytest.fixture
+def estimation_procedure_v2(http_client_v2, minio_client) -> EstimationProcedureV2API:
+    return EstimationProcedureV2API(http=http_client_v2, minio=minio_client)
+
+
+@pytest.mark.test_server()
+def test_v1_list(estimation_procedure_v1):
+    details = estimation_procedure_v1.list()
+    
+    assert isinstance(details, list)
+    assert len(details) > 0
+    assert all(isinstance(d, OpenMLEstimationProcedure) for d in details)
+
+
+@pytest.mark.test_server()
+def test_v2_list(estimation_procedure_v2):
+    with pytest.raises(OpenMLNotSupportedError):
+        estimation_procedure_v2.list()