From 9fde49a3e7cf4ebb8dd6462560f85f7f1b52eac5 Mon Sep 17 00:00:00 2001
From: Vasileios Karakasis <vkarakasis@nvidia.com>
Date: Tue, 10 Feb 2026 11:34:53 -0800
Subject: [PATCH 1/3] Move test prefix logic in the metaclass

---
 reframe/core/meta.py       |  25 +++++++
 reframe/core/pipeline.py   |  42 +++--------
 unittests/test_pipeline.py | 138 +++++++++++++++++++------------------
 unittests/test_policies.py |   4 +-
 unittests/utility.py       |  13 ----
 5 files changed, 107 insertions(+), 115 deletions(-)

diff --git a/reframe/core/meta.py b/reframe/core/meta.py
index f64d823eb0..5e52426b5e 100644
--- a/reframe/core/meta.py
+++ b/reframe/core/meta.py
@@ -9,6 +9,7 @@
 
 import functools
 import inspect
+import os
 import types
 import collections
 
@@ -19,6 +20,7 @@
 import reframe.core.fixtures as fixtures
 import reframe.core.hooks as hooks
 import reframe.utility as utils
+import reframe.utility.osext as osext
 
 from reframe.core.exceptions import ReframeSyntaxError
 
@@ -427,6 +429,29 @@ def __init__(cls, name, bases, namespace, **kwargs):
                         with_code_context=True
                     )
 
+        # Set the test prefix
+        #
+        # First check if the current test pins the prefix and store this, so
+        # as to reuse in derived tests
+        curr_prefix = os.path.abspath(
+            os.path.dirname(inspect.getfile(cls))
+        )
+        if kwargs.pop('pin_prefix', False):
+            cls._rfm_pinned_prefix = curr_prefix
+
+        try:
+            prefix = kwargs['custom_prefix']
+        except KeyError:
+            if osext.is_interactive():
+                prefix = os.getcwd()
+            else:
+                try:
+                    prefix = cls._rfm_pinned_prefix
+                except AttributeError:
+                    prefix = curr_prefix
+
+        cls._rfm_prefix = prefix
+
     def __call__(cls, *args, **kwargs):
         '''Inject test builtins during object construction.
 
diff --git a/reframe/core/pipeline.py b/reframe/core/pipeline.py
index e77fd23673..b29fb07125 100644
--- a/reframe/core/pipeline.py
+++ b/reframe/core/pipeline.py
@@ -156,8 +156,8 @@ class RegressionMixin(RegressionTestPlugin):
     '''
 
     @classmethod
-    def __init_subclass__(cls, **kwargs):
-        super().__init_subclass__(**kwargs)
+    def __init_subclass__(cls):
+        super().__init_subclass__()
         user_deprecation_warning(
             '`RegressionMixin` is deprecated; '
             'please inherit from `RegressionTestPlugin` instead'
@@ -1131,24 +1131,10 @@ def pipeline_hooks(cls):
     def __new__(cls, *args, **kwargs):
         obj = super().__new__(cls)
 
-        # Determine the prefix
-        try:
-            prefix = cls._rfm_custom_prefix
-        except AttributeError:
-            if osext.is_interactive():
-                prefix = os.getcwd()
-            else:
-                try:
-                    prefix = cls._rfm_pinned_prefix
-                except AttributeError:
-                    prefix = os.path.abspath(
-                        os.path.dirname(inspect.getfile(cls))
-                    )
-
         # Prepare initialization of test defaults (variables and parameters are
         # injected after __new__ has returned, so we schedule this function
         # call as a pre-init hook).
-        obj.__deferred_rfm_init = obj.__rfm_init__(prefix)
+        obj.__deferred_rfm_init = obj.__rfm_init__()
 
         # Build pipeline hook registry and add the pre-init hook
         cls._rfm_pipeline_hooks = cls._process_hook_registry()
@@ -1173,9 +1159,9 @@ def __init__(self):
         pass
 
     @classmethod
-    def __init_subclass__(cls, *, special=False, pin_prefix=False,
+    def __init_subclass__(cls, *, special=False,
                           require_version=None, **kwargs):
-        super().__init_subclass__(**kwargs)
+        super().__init_subclass__()
         cls._rfm_override_final = special
 
         if require_version:
@@ -1183,21 +1169,13 @@ def __init_subclass__(cls, *, special=False, pin_prefix=False,
         elif not hasattr(cls, '_rfm_required_version'):
             cls._rfm_required_version = []
 
-        # Insert the prefix to pin the test to if the test lives in a test
-        # library with resources in it.
-        if pin_prefix:
-            cls._rfm_pinned_prefix = os.path.abspath(
-                os.path.dirname(inspect.getfile(cls))
-            )
-
     @deferrable
-    def __rfm_init__(self, prefix=None):
+    def __rfm_init__(self):
         self._perfvalues = {}
 
         # Static directories of the regression check
-        self._prefix = os.path.abspath(prefix)
         if (self.sourcesdir == 'src' and
-            not os.path.isdir(os.path.join(self._prefix, self.sourcesdir))):
+            not os.path.isdir(os.path.join(self._rfm_prefix, self.sourcesdir))):
             self.sourcesdir = None
 
         # Runtime information of the test
@@ -1568,7 +1546,7 @@ def prefix(self):
 
         :type: :class:`str`.
         '''
-        return self._prefix
+        return self._rfm_prefix
 
     @loggable
     @property
@@ -1997,7 +1975,7 @@ def compile(self):
                 if osext.is_url(self.sourcesdir):
                     self._clone_to_stagedir(self.sourcesdir)
                 else:
-                    self._copy_to_stagedir(os.path.join(self._prefix,
+                    self._copy_to_stagedir(os.path.join(self._rfm_prefix,
                                                         self.sourcesdir))
 
         # Set executable (only if hasn't been provided)
@@ -2878,7 +2856,7 @@ def run(self):
             if osext.is_url(self.sourcesdir):
                 self._clone_to_stagedir(self.sourcesdir)
             else:
-                self._copy_to_stagedir(os.path.join(self._prefix,
+                self._copy_to_stagedir(os.path.join(self._rfm_prefix,
                                                     self.sourcesdir))
 
         super().run()
diff --git a/unittests/test_pipeline.py b/unittests/test_pipeline.py
index 694771bba2..cf73fc4562 100644
--- a/unittests/test_pipeline.py
+++ b/unittests/test_pipeline.py
@@ -14,6 +14,7 @@
 import reframe.utility.osext as osext
 import reframe.utility.sanity as sn
 import unittests.utility as test_util
+from pathlib import Path
 
 from reframe.core.exceptions import (BuildError,
                                      ExpectedFailureError,
@@ -226,8 +227,8 @@ def validate(self):
 
 
 def test_run_only_set_sanity_in_a_hook(local_exec_ctx):
-    @test_util.custom_prefix('unittests/resources/checks')
-    class MyTest(rfm.RunOnlyRegressionTest):
+    class MyTest(rfm.RunOnlyRegressionTest,
+                 custom_prefix='unittests/resources/checks'):
         executable = './hello.sh'
         executable_opts = ['Hello, World!']
         local = True
@@ -244,8 +245,8 @@ def set_sanity(self):
 
 
 def test_run_only_decorated_sanity(local_exec_ctx):
-    @test_util.custom_prefix('unittests/resources/checks')
-    class MyTest(rfm.RunOnlyRegressionTest):
+    class MyTest(rfm.RunOnlyRegressionTest,
+                 custom_prefix='unittests/resources/checks'):
         executable = './hello.sh'
         executable_opts = ['Hello, World!']
         local = True
@@ -267,8 +268,7 @@ class MyOtherTest(MyTest):
 
 
 def test_run_only_no_srcdir(local_exec_ctx):
-    @test_util.custom_prefix('foo/bar/')
-    class MyTest(rfm.RunOnlyRegressionTest):
+    class MyTest(rfm.RunOnlyRegressionTest, custom_prefix='foo/bar/'):
         valid_systems = ['*']
         valid_prog_environs = ['*']
         executable = 'echo'
@@ -280,8 +280,7 @@ class MyTest(rfm.RunOnlyRegressionTest):
 
 
 def test_run_only_srcdir_set_to_none(local_exec_ctx):
-    @test_util.custom_prefix('foo/bar/')
-    class MyTest(rfm.RunOnlyRegressionTest):
+    class MyTest(rfm.RunOnlyRegressionTest, custom_prefix='foo/bar/'):
         executable = 'echo'
         valid_prog_environs = ['*']
         valid_systems = ['*']
@@ -303,8 +302,8 @@ class MyTest(rfm.RunOnlyRegressionTest):
 
 
 def test_compile_only_failure(local_exec_ctx):
-    @test_util.custom_prefix('unittests/resources/checks')
-    class MyTest(rfm.CompileOnlyRegressionTest):
+    class MyTest(rfm.CompileOnlyRegressionTest,
+                 custom_prefix='unittests/resources/checks'):
         sourcepath = 'compiler_failure.c'
         valid_prog_environs = ['*']
         valid_systems = ['*']
@@ -317,8 +316,8 @@ class MyTest(rfm.CompileOnlyRegressionTest):
 
 
 def test_compile_only_warning(local_exec_ctx):
-    @test_util.custom_prefix('unittests/resources/checks')
-    class MyTest(rfm.CompileOnlyRegressionTest):
+    class MyTest(rfm.CompileOnlyRegressionTest,
+                 custom_prefix='unittests/resources/checks'):
         valid_prog_environs = ['*']
         valid_systems = ['*']
         build_system = 'SingleSource'
@@ -341,7 +340,7 @@ class MyTest(pinnedtest):
 
     pinned = MyTest()
     expected_prefix = os.path.join(os.getcwd(), 'unittests/resources/checks')
-    assert pinned._prefix == expected_prefix
+    assert pinned.prefix == expected_prefix
 
 
 def test_valid_systems_syntax(hellotest):
@@ -742,8 +741,8 @@ def _assert_supported(valid_systems, valid_prog_environs,
 
 
 def test_sourcesdir_none(local_exec_ctx):
-    @test_util.custom_prefix('unittests/resources/checks')
-    class MyTest(rfm.RegressionTest):
+    class MyTest(rfm.RegressionTest,
+                 custom_prefix='unittests/resources/checks'):
         sourcesdir = None
         valid_prog_environs = ['*']
         valid_systems = ['*']
@@ -753,8 +752,8 @@ class MyTest(rfm.RegressionTest):
 
 
 def test_sourcesdir_build_system(local_exec_ctx):
-    @test_util.custom_prefix('unittests/resources/checks')
-    class MyTest(rfm.RegressionTest):
+    class MyTest(rfm.RegressionTest,
+                 custom_prefix='unittests/resources/checks'):
         build_system = 'Make'
         sourcepath = 'code'
         executable = './code/hello'
@@ -772,8 +771,8 @@ def test_sourcesdir_git(local_exec_ctx):
     if test_util.OFFLINE:
         pytest.skip('offline tests requested')
 
-    @test_util.custom_prefix('unittests/resources/checks')
-    class MyTest(rfm.RunOnlyRegressionTest):
+    class MyTest(rfm.RunOnlyRegressionTest,
+                 custom_prefix='unittests/resources/checks'):
         sourcesdir = 'https://github.com/reframe-hpc/ci-hello-world.git'
         executable = 'true'
         valid_systems = ['*']
@@ -789,8 +788,8 @@ def validate(self):
 
 
 def test_sourcesdir_none_generated_sources(local_exec_ctx):
-    @test_util.custom_prefix('unittests/resources/checks')
-    class MyTest(rfm.RegressionTest):
+    class MyTest(rfm.RegressionTest,
+                 custom_prefix='unittests/resources/checks'):
         sourcesdir = None
         prebuild_cmds = [
             "printf '#include <stdio.h>\\n int main(){ "
@@ -809,8 +808,8 @@ def validate(self):
 
 
 def test_sourcesdir_none_compile_only(local_exec_ctx):
-    @test_util.custom_prefix('unittests/resources/checks')
-    class MyTest(rfm.CompileOnlyRegressionTest):
+    class MyTest(rfm.CompileOnlyRegressionTest,
+                 custom_prefix='unittests/resources/checks'):
         sourcesdir = None
         valid_prog_environs = ['*']
         valid_systems = ['*']
@@ -820,8 +819,8 @@ class MyTest(rfm.CompileOnlyRegressionTest):
 
 
 def test_sourcesdir_none_run_only(local_exec_ctx):
-    @test_util.custom_prefix('unittests/resources/checks')
-    class MyTest(rfm.RunOnlyRegressionTest):
+    class MyTest(rfm.RunOnlyRegressionTest,
+                 custom_prefix='unittests/resources/checks'):
         sourcesdir = None
         executable = 'echo'
         executable_opts = ['Hello, World!']
@@ -836,8 +835,8 @@ def validate(self):
 
 
 def test_sourcepath_abs(local_exec_ctx):
-    @test_util.custom_prefix('unittests/resources/checks')
-    class MyTest(rfm.CompileOnlyRegressionTest):
+    class MyTest(rfm.CompileOnlyRegressionTest,
+                 custom_prefix='unittests/resources/checks'):
         valid_prog_environs = ['*']
         valid_systems = ['*']
 
@@ -849,8 +848,8 @@ class MyTest(rfm.CompileOnlyRegressionTest):
 
 
 def test_sourcepath_upref(local_exec_ctx):
-    @test_util.custom_prefix('unittests/resources/checks')
-    class MyTest(rfm.CompileOnlyRegressionTest):
+    class MyTest(rfm.CompileOnlyRegressionTest,
+                 custom_prefix='unittests/resources/checks'):
         valid_prog_environs = ['*']
         valid_systems = ['*']
 
@@ -862,8 +861,8 @@ class MyTest(rfm.CompileOnlyRegressionTest):
 
 
 def test_sourcepath_non_existent(local_exec_ctx):
-    @test_util.custom_prefix('unittests/resources/checks')
-    class MyTest(rfm.CompileOnlyRegressionTest):
+    class MyTest(rfm.CompileOnlyRegressionTest,
+                 custom_prefix='unittests/resources/checks'):
         valid_prog_environs = ['*']
         valid_systems = ['*']
 
@@ -876,8 +875,7 @@ class MyTest(rfm.CompileOnlyRegressionTest):
 
 
 def test_extra_resources(HelloTest, testsys_exec_ctx):
-    @test_util.custom_prefix('unittests/resources/checks')
-    class MyTest(HelloTest):
+    class MyTest(HelloTest, custom_prefix='unittests/resources/checks'):
         local = True
 
         @run_after('setup')
@@ -958,8 +956,7 @@ def __init__(self):
 
 
 def test_setup_hooks(HelloTest, local_exec_ctx):
-    @test_util.custom_prefix('unittests/resources/checks')
-    class MyTest(HelloTest):
+    class MyTest(HelloTest, custom_prefix='unittests/resources/checks'):
         count = variable(int, value=0)
 
         @run_before('setup')
@@ -978,8 +975,7 @@ def postfoo(self):
 
 
 def test_compile_hooks(HelloTest, local_exec_ctx):
-    @test_util.custom_prefix('unittests/resources/checks')
-    class MyTest(HelloTest):
+    class MyTest(HelloTest, custom_prefix='unittests/resources/checks'):
         count = variable(int, value=0)
 
         @run_before('compile')
@@ -999,8 +995,7 @@ def check_executable(self):
 
 
 def test_run_hooks(HelloTest, local_exec_ctx):
-    @test_util.custom_prefix('unittests/resources/checks')
-    class MyTest(HelloTest):
+    class MyTest(HelloTest, custom_prefix='unittests/resources/checks'):
         @run_before('run')
         def setflags(self):
             self.postrun_cmds = ['echo hello > greetings.txt']
@@ -1016,8 +1011,7 @@ def check_executable(self):
 
 
 def test_multiple_hooks(HelloTest, local_exec_ctx):
-    @test_util.custom_prefix('unittests/resources/checks')
-    class MyTest(HelloTest):
+    class MyTest(HelloTest, custom_prefix='unittests/resources/checks'):
         var = variable(int, value=0)
 
         @run_after('setup')
@@ -1038,8 +1032,7 @@ def z(self):
 
 
 def test_stacked_hooks(HelloTest, local_exec_ctx):
-    @test_util.custom_prefix('unittests/resources/checks')
-    class MyTest(HelloTest):
+    class MyTest(HelloTest, custom_prefix='unittests/resources/checks'):
         var = variable(int, value=0)
 
         @run_before('setup')
@@ -1060,8 +1053,7 @@ class MyTest(rfm.RunOnlyRegressionTest, HelloTest):
 
 
 def test_inherited_hooks(HelloTest, local_exec_ctx):
-    @test_util.custom_prefix('unittests/resources/checks')
-    class BaseTest(HelloTest):
+    class BaseTest(HelloTest, custom_prefix='unittests/resources/checks'):
         var = variable(int, value=0)
 
         @run_after('setup')
@@ -1145,8 +1137,7 @@ def test_inherited_hooks_order(weird_mro_test, local_exec_ctx):
 
 
 def test_inherited_hooks_from_instantiated_tests(HelloTest, local_exec_ctx):
-    @test_util.custom_prefix('unittests/resources/checks')
-    class T0(HelloTest):
+    class T0(HelloTest, custom_prefix='unittests/resources/checks'):
         var = variable(int, value=0)
 
         @run_after('setup')
@@ -1170,8 +1161,7 @@ def y(self):
 
 
 def test_overriden_hooks(HelloTest, local_exec_ctx):
-    @test_util.custom_prefix('unittests/resources/checks')
-    class BaseTest(HelloTest):
+    class BaseTest(HelloTest, custom_prefix='unittests/resources/checks'):
         var = variable(int, value=0)
         foo = variable(int, value=0)
 
@@ -1200,8 +1190,7 @@ def y(self):
 
 
 def test_overriden_hook_different_stages(HelloTest, local_exec_ctx):
-    @test_util.custom_prefix('unittests/resources/checks')
-    class MyTest(HelloTest):
+    class MyTest(HelloTest, custom_prefix='unittests/resources/checks'):
         @run_after('init')
         def foo(self):
             pass
@@ -1215,8 +1204,8 @@ def foo(self):
 
 
 def test_overriden_hook_exec_order():
-    @test_util.custom_prefix('unittests/resources/checks')
-    class X(rfm.RunOnlyRegressionTest):
+    class X(rfm.RunOnlyRegressionTest,
+            custom_prefix='unittests/resources/checks'):
         @run_before('run')
         def foo(self):
             pass
@@ -1298,8 +1287,7 @@ def bar(self):
 
 
 def test_disabled_hooks(HelloTest, local_exec_ctx):
-    @test_util.custom_prefix('unittests/resources/checks')
-    class BaseTest(HelloTest):
+    class BaseTest(HelloTest, custom_prefix='unittests/resources/checks'):
         var = variable(int, value=0)
         foo = variable(int, value=0)
 
@@ -1327,12 +1315,10 @@ def test_require_deps(HelloTest, local_exec_ctx):
     import reframe.frontend.dependencies as dependencies
     import reframe.frontend.executors as executors
 
-    @test_util.custom_prefix('unittests/resources/checks')
-    class T0(HelloTest):
+    class T0(HelloTest, custom_prefix='unittests/resources/checks'):
         x = variable(int, value=1)
 
-    @test_util.custom_prefix('unittests/resources/checks')
-    class T1(HelloTest):
+    class T1(HelloTest, custom_prefix='unittests/resources/checks'):
         @run_after('init')
         def setdeps(self):
             self.depends_on('T0')
@@ -1363,8 +1349,8 @@ def setz(self, T0):
 def test_trap_job_errors_without_sanity_patterns(local_exec_ctx):
     rt.runtime().site_config.add_sticky_option('general/trap_job_errors', True)
 
-    @test_util.custom_prefix('unittests/resources/checks')
-    class MyTest(rfm.RunOnlyRegressionTest):
+    class MyTest(rfm.RunOnlyRegressionTest,
+                 custom_prefix='unittests/resources/checks'):
         valid_prog_environs = ['*']
         valid_systems = ['*']
         executable = 'exit 10'
@@ -1376,8 +1362,8 @@ class MyTest(rfm.RunOnlyRegressionTest):
 def test_trap_job_errors_with_sanity_patterns(local_exec_ctx):
     rt.runtime().site_config.add_sticky_option('general/trap_job_errors', True)
 
-    @test_util.custom_prefix('unittests/resources/checks')
-    class MyTest(rfm.RunOnlyRegressionTest):
+    class MyTest(rfm.RunOnlyRegressionTest,
+                 custom_prefix='unittests/resources/checks'):
         valid_prog_environs = ['*']
         valid_systems = ['*']
         prerun_cmds = ['echo hello']
@@ -1419,9 +1405,25 @@ def sanity_file(tmp_path):
 # should not change to the `@performance_function` syntax`
 
 @pytest.fixture
-def dummytest(testsys_exec_ctx, perf_file, sanity_file):
-    class MyTest(rfm.RunOnlyRegressionTest):
-        def __init__(self):
+def dummytest(testsys_exec_ctx, perf_file, sanity_file, tmp_path):
+    class MyTest(rfm.RunOnlyRegressionTest,
+                 custom_prefix=str(tmp_path)):
+        perf_file = variable(str, Path)
+        reference = {
+            'testsys': {
+                'value1': (1.4, -0.1, 0.1, None),
+                'value2': (1.7, -0.1, 0.1, None),
+            },
+            'testsys:gpu': {
+                'value3': (3.1, -0.1, 0.1, None),
+            }
+        }
+
+        # Since we are in a fixture definition, `perf_file` and `sanity_file`
+        # are still unresolved fixture objects, so we cannot directly use them
+        # in the class body. Instead, we set them in a post-init hook.
+        @run_after('init')
+        def set_perf_file(self):
             self.perf_file = perf_file
             self.sourcesdir = None
             self.reference = {
@@ -1911,8 +1913,8 @@ def reftuple(status):
 @pytest.fixture
 def container_test(tmp_path):
     def _container_test(platform, image):
-        @test_util.custom_prefix(tmp_path)
-        class ContainerTest(rfm.RunOnlyRegressionTest):
+        class ContainerTest(rfm.RunOnlyRegressionTest,
+                            custom_prefix=tmp_path):
             valid_prog_environs = ['*']
             valid_systems = ['*']
             prerun_cmds = ['touch foo']
diff --git a/unittests/test_policies.py b/unittests/test_policies.py
index d3f40b270e..a3c1601324 100644
--- a/unittests/test_policies.py
+++ b/unittests/test_policies.py
@@ -65,8 +65,8 @@ def make_cases_for_skipping(request):
     import reframe.utility.sanity as sn
 
     def _make_cases():
-        @test_util.custom_prefix('unittests/resources/checks')
-        class _T0(rfm.RegressionTest):
+        class _T0(rfm.RegressionTest,
+                  custom_prefix='unittests/resources/checks'):
             valid_systems = ['*']
             valid_prog_environs = ['*']
             sourcepath = 'hello.c'
diff --git a/unittests/utility.py b/unittests/utility.py
index f51a21e576..c1c62ff179 100644
--- a/unittests/utility.py
+++ b/unittests/utility.py
@@ -86,19 +86,6 @@ def has_sane_modules_system():
                           (modules.NoModImpl, modules.SpackImpl))
 
 
-def custom_prefix(prefix):
-    '''Assign a custom prefix to a test.
-
-    This is useful in unit tests when we want to create tests on-the-fly and
-    associate them with existing resources.'''
-
-    def _set_prefix(cls):
-        cls._rfm_custom_prefix = prefix
-        return cls
-
-    return _set_prefix
-
-
 def dispatch(argname, suffix=None):
     '''Dispatch call to the decorated function to another one based on the type
     of the keyword argument ``argname``.

From b0ac4111579ee7320db0503e6196458aab7319a9 Mon Sep 17 00:00:00 2001
From: Vasileios Karakasis <vkarakasis@nvidia.com>
Date: Fri, 26 Dec 2025 11:40:19 +0100
Subject: [PATCH 2/3] Support arbitrarily indexed references

---
 docs/config_reference.rst                     |  17 +-
 docs/howto.rst                                |  87 +++
 docs/manpage.rst                              |  19 +
 docs/tutorial.rst                             |   3 +
 examples/howto/reference_index.py             |  94 +++
 examples/tutorial/stream/stream_parameters.py |   4 +-
 reframe/core/exceptions.py                    |  10 +
 reframe/core/fields.py                        |   8 +-
 reframe/core/pipeline.py                      | 573 +++++++++++++++++-
 reframe/frontend/cli.py                       |   7 +
 reframe/utility/__init__.py                   |   4 +-
 reframe/utility/jsonext.py                    |   5 +-
 .../resources/checks_unlisted/xfailures.py    |   8 +-
 unittests/test_pipeline.py                    | 380 +++++++++++-
 14 files changed, 1156 insertions(+), 63 deletions(-)
 create mode 100644 examples/howto/reference_index.py

diff --git a/docs/config_reference.rst b/docs/config_reference.rst
index 2aaa042242..ec0d34fb67 100644
--- a/docs/config_reference.rst
+++ b/docs/config_reference.rst
@@ -2064,6 +2064,21 @@ General Configuration
    .. versionadded:: 3.7.0
 
 
+.. py:attribute:: general.reference_prefix
+
+   :required: No
+   :default: :obj:`None`
+
+   Directory prefix for resolving paths of external reference files.
+
+   When a test's :attr:`~reframe.core.pipeline.RegressionTest.reference`
+   attribute uses the ``$ref`` key to load references from an :ref:`external
+   reference file <external-references>`, that file is looked up
+   under this prefix. When not set, the test's prefix directory is used.
+
+   .. versionadded:: 4.10
+
+
 .. py:attribute:: general.ignore_check_conflicts
 
    :required: No
@@ -2534,4 +2549,4 @@ This is the builtin configuration that ReFrame always loads.
 
 .. seealso::
 
-   See also how configuration files are :ref:`loaded <manpage-configuration>` and how you can specify them with the :option:`--config-file` option.
\ No newline at end of file
+   See also how configuration files are :ref:`loaded <manpage-configuration>` and how you can specify them with the :option:`--config-file` option.
diff --git a/docs/howto.rst b/docs/howto.rst
index 9e3acece64..f1ef134b6c 100644
--- a/docs/howto.rst
+++ b/docs/howto.rst
@@ -314,6 +314,93 @@ In this case, you could set the :attr:`build_system` to ``'CustomBuild'`` and su
     You should use this build system with caution, because environment management, reproducibility and any potential side effects are all controlled by the custom build system.
 
 
+.. _howto-reference-index:
+
+Custom performance reference indexing
+=====================================
+
+.. versionadded:: 4.10
+
+By default the :attr:`~reframe.core.pipeline.RegressionTest.reference` attribute is indexed by the system and/or system/partition combination.
+However, it is often the case that the reference values depend on test variables and/or parameters.
+ReFrame allows you to define a custom index for the reference dictionary by using the special ``$index`` key.
+Here is an example reference definition for the stream benchmark tutorial example where references are defined per number of threads and thread placement:
+
+.. literalinclude:: ../examples/howto/reference_index.py
+   :lines: 5-
+
+Special keys are also supported to allow users to index their references by the system, environment, processor and device details.
+For example, we could define different references for different environments by using the ``$environ`` special key as follows:
+
+.. code-block:: python
+
+    reference = {
+        '$index': ('$environ', 'num_threads'),
+        'gnu': {
+            '1': {
+                'copy_bw': (10000, -0.2, 0.2, 'MB/s'),
+            },
+            '2': {
+                'copy_bw': (20000, -0.2, 0.2, 'MB/s'),
+            },
+            '4': {
+                'copy_bw': (40000, -0.2, 0.2, 'MB/s'),
+            },
+        },
+        'clang': {
+            '1': {
+                'copy_bw': (10000, -0.2, 0.2, 'MB/s'),
+            },
+            '2': {
+                'copy_bw': (20000, -0.2, 0.2, 'MB/s'),
+            },
+            '4': {
+                'copy_bw': (40000, -0.2, 0.2, 'MB/s'),
+            },
+        }
+    }
+
+External references
+-------------------
+
+.. versionadded:: 4.10
+
+Users can also keep test references in a separate YAML file instead of in the test class.
+To achieve this, the special ``$ref`` key must be used in the :attr:`reference` dictionary.
+
+.. code-block:: python
+
+    reference = {'$ref': 'references/stream.yaml'}
+
+By default, reference files are resolved relative to the test's :attr:`prefix` directory, but this can be controlled by the :attr:`~config.general.reference_prefix` configuration option or the :envvar:`RFM_REFERENCE_PREFIX`.
+
+The reference file can contain references for multiple tests and the general structure is an 1-1 match to the inline reference dictionary.
+Here is how the reference file for the previous example would look like:
+
+.. code-block:: yaml
+
+    stream_test:
+      $index: ['$environ', 'num_threads']
+      gnu:
+         1:
+            'copy_bw': [10000, -0.2, 0.2, 'MB/s']
+         2:
+            'copy_bw': [20000, -0.2, 0.2, 'MB/s']
+         4:
+            'copy_bw': [40000, -0.2, 0.2, 'MB/s']
+      clang:
+         1:
+            'copy_bw': [10000, -0.2, 0.2, 'MB/s']
+         2:
+            'copy_bw': [20000, -0.2, 0.2, 'MB/s']
+         4:
+            'copy_bw': [40000, -0.2, 0.2, 'MB/s']
+
+
+.. seealso::
+   Check the API docs for the :attr:`~reframe.core.pipeline.RegressionTest.reference` test's attribute for all the details on how to define references.
+
+
 .. _working-with-environment-modules:
 
 Working with environment modules
diff --git a/docs/manpage.rst b/docs/manpage.rst
index 30a28e4885..2a66943faf 100644
--- a/docs/manpage.rst
+++ b/docs/manpage.rst
@@ -2310,6 +2310,25 @@ Whenever an environment variable is associated with a configuration option, its
    .. versionadded:: 3.7.0
 
 
+.. envvar:: RFM_REFERENCE_PREFIX
+
+   Directory prefix for resolving paths of external reference files.
+
+   When a test's reference attribute uses the ``$ref`` key to load references
+   from a file, that file is looked up under this prefix. When not set, the
+   test's prefix directory is used.
+
+   .. table::
+      :align: left
+
+      ================================== ==================
+      Associated command line option     N/A
+      Associated configuration parameter :attr:`~config.general.reference_prefix`
+      ================================== ==================
+
+   .. versionadded:: 4.10
+
+
 .. envvar:: RFM_REPORT_FILE
 
    The file where ReFrame will store its report.
diff --git a/docs/tutorial.rst b/docs/tutorial.rst
index a6a04d43eb..2c67a372be 100644
--- a/docs/tutorial.rst
+++ b/docs/tutorial.rst
@@ -291,6 +291,9 @@ The lower and upper thresholds are deviations from the target reference expresse
 In our example, we allow the ``copy_bw`` to be 10% lower than the target reference and no more than 30% higher.
 Sometimes, especially in microbenchmarks, it is a good practice to set an upper threshold to denote the absolute maximum that cannot be exceeded.
 
+It is also possible to define a custom index for the reference dictionary, that is not based on the system and/or partition combination.
+Check the :ref:`howto-reference-index` in the :doc:`howto` for more details.
+
 
 Dry-run mode
 ------------
diff --git a/examples/howto/reference_index.py b/examples/howto/reference_index.py
new file mode 100644
index 0000000000..d47c2dec1f
--- /dev/null
+++ b/examples/howto/reference_index.py
@@ -0,0 +1,94 @@
+# Copyright 2016-2026 Swiss National Supercomputing Centre (CSCS/ETH Zurich)
+# ReFrame Project Developers. See the top-level LICENSE file for details.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+import os
+import reframe as rfm
+import reframe.utility.sanity as sn
+
+
+class build_stream(rfm.CompileOnlyRegressionTest):
+    build_system = 'SingleSource'
+    sourcepath = 'stream.c'
+    executable = './stream.x'
+    array_size = variable(int, value=0)
+
+    @run_before('compile')
+    def prepare_build(self):
+        omp_flag = self.current_environ.extras.get('omp_flag')
+        self.build_system.cflags = ['-O3', omp_flag]
+        if self.array_size:
+            self.build_system.cppflags = [f'-DARRAY_SIZE={self.array_size}']
+
+
+@rfm.simple_test
+class stream_test(rfm.RunOnlyRegressionTest):
+    valid_systems = ['*']
+    valid_prog_environs = ['+openmp']
+    stream_binary = fixture(build_stream, scope='environment')
+    num_threads = parameter([1, 2, 4, 8])
+    thread_placement = parameter(['close', 'spread'])
+    reference = {
+        '$index': ('num_threads', 'thread_placement'),
+        1: {
+            'close': {
+                'copy_bw': (10000, -0.2, 0.2, 'MB/s'),
+                'triad_bw': (8000, -0.2, 0.2, 'MB/s'),
+            },
+            'spread': {
+                'copy_bw': (10500, -0.2, 0.2, 'MB/s'),
+                'triad_bw': (8500, -0.2, 0.2, 'MB/s'),
+            },
+        },
+        2: {
+            'close': {
+                'copy_bw': (18000, -0.2, 0.2, 'MB/s'),
+                'triad_bw': (16000, -0.2, 0.2, 'MB/s'),
+            },
+            'spread': {
+                'copy_bw': (18500, -0.2, 0.2, 'MB/s'),
+                'triad_bw': (16500, -0.2, 0.2, 'MB/s'),
+            },
+        },
+        4: {
+            'close': {
+                'copy_bw': (32000, -0.2, 0.2, 'MB/s'),
+                'triad_bw': (29500, -0.2, 0.2, 'MB/s'),
+            },
+            'spread': {
+                'copy_bw': (33000, -0.2, 0.2, 'MB/s'),
+                'triad_bw': (30500, -0.2, 0.2, 'MB/s'),
+            },
+        },
+        8: {
+            'close': {
+                'copy_bw': (60000, -0.2, 0.2, 'MB/s'),
+                'triad_bw': (55000, -0.2, 0.2, 'MB/s'),
+            },
+            'spread': {
+                'copy_bw': (62000, -0.2, 0.2, 'MB/s'),
+                'triad_bw': (57000, -0.2, 0.2, 'MB/s'),
+            },
+        },
+    }
+
+    @run_after('setup')
+    def set_executable(self):
+        self.executable = os.path.join(self.stream_binary.stagedir, 'stream.x')
+
+    @run_before('run')
+    def setup_threading(self):
+        self.env_vars['OMP_NUM_THREADS'] = self.num_threads
+        self.env_vars['OMP_PROC_BIND'] = self.thread_placement
+
+    @sanity_function
+    def validate(self):
+        return sn.assert_found(r'Solution Validates', self.stdout)
+
+    @performance_function('MB/s')
+    def copy_bw(self):
+        return sn.extractsingle(r'Copy:\s+(\S+)', self.stdout, 1, float)
+
+    @performance_function('MB/s')
+    def triad_bw(self):
+        return sn.extractsingle(r'Triad:\s+(\S+)', self.stdout, 1, float)
diff --git a/examples/tutorial/stream/stream_parameters.py b/examples/tutorial/stream/stream_parameters.py
index afd3837bf0..037bcb1ff1 100644
--- a/examples/tutorial/stream/stream_parameters.py
+++ b/examples/tutorial/stream/stream_parameters.py
@@ -26,8 +26,8 @@ class stream_test(rfm.RunOnlyRegressionTest):
     valid_systems = ['*']
     valid_prog_environs = ['+openmp']
     stream_binary = fixture(build_stream, scope='environment')
-    num_threads = parameter([1, 2, 4, 8])
-    thread_placement = parameter(['true', 'close', 'spread'])
+    num_threads = parameter([1, 2, 4, 8], type=int)
+    thread_placement = parameter(['true', 'close', 'spread'], type=str)
 
     @run_after('setup')
     def set_executable(self):
diff --git a/reframe/core/exceptions.py b/reframe/core/exceptions.py
index 475978094e..96117e560c 100644
--- a/reframe/core/exceptions.py
+++ b/reframe/core/exceptions.py
@@ -349,6 +349,16 @@ class UnexpectedSuccessError(ReframeError):
     '''Raised when a test unexpectedly passes'''
 
 
+class ReferenceParseError(ReframeError):
+    '''Exception raised when a reference file cannot be parsed.
+
+    .. seealso::
+
+       :attr:`~reframe.core.pipeline.RegressionTest.reference` for details on
+       how to set test references.
+    '''
+
+
 def user_frame():
     '''Return the first user frame as a :py:class:`FrameInfo` object.
 
diff --git a/reframe/core/fields.py b/reframe/core/fields.py
index 02981796bb..2d677ed7d0 100644
--- a/reframe/core/fields.py
+++ b/reframe/core/fields.py
@@ -94,7 +94,8 @@ def _check_type(self, value):
     def __set__(self, obj, value):
         try:
             self._check_type(value)
-        except TypeError:
+        except TypeError as err:
+            last_error = err
             raw_value = remove_convertible(value)
             if raw_value is value and not self._allow_implicit:
                 # value was not convertible and the field does not allow
@@ -106,7 +107,8 @@ def __set__(self, obj, value):
             for t in self._types:
                 try:
                     value = t(value)
-                except TypeError:
+                except TypeError as err:
+                    last_error = err
                     continue
                 else:
                     return super().__set__(obj, value)
@@ -116,7 +118,7 @@ def __set__(self, obj, value):
             raise TypeError(
                 f'failed to set variable {self._name!r}: '
                 f'could not convert to any of the supported types: '
-                f'{typenames}'
+                f'{typenames}: {last_error}'
             )
         else:
             return super().__set__(obj, value)
diff --git a/reframe/core/pipeline.py b/reframe/core/pipeline.py
index b29fb07125..2071a664b1 100644
--- a/reframe/core/pipeline.py
+++ b/reframe/core/pipeline.py
@@ -8,24 +8,30 @@
 #
 
 __all__ = [
-    'CompileOnlyRegressionTest', 'RegressionMixin',
-    'RegressionTest', 'RunOnlyRegressionTest', 'RegressionTestPlugin'
+    'CompileOnlyRegressionTest',
+    'RegressionMixin',
+    'RegressionTest',
+    'RegressionTestDict',
+    'RegressionTestDictType',
+    'RegressionTestPlugin',
+    'RunOnlyRegressionTest'
 ]
 
 
 import glob
 import hashlib
-import inspect
+import functools
 import itertools
 import numbers
 import os
+import re
 import shutil
+import yaml
+from collections import UserDict
 from pathlib import Path
 
-import reframe.core.fields as fields
-import reframe.core.hooks as hooks
-import reframe.core.logging as logging
 import reframe.core.runtime as rt
+import reframe.core.fields as fields
 import reframe.utility as util
 import reframe.utility.jsonext as jsonext
 import reframe.utility.osext as osext
@@ -33,9 +39,10 @@
 import reframe.utility.typecheck as typ
 import reframe.utility.udeps as udeps
 from reframe.core.backends import getlauncher, getscheduler
-from reframe.core.builtins import _XFailReference
+from reframe.core.builtins import _XFailReference, xfail
 from reframe.core.buildsystems import BuildSystemField
 from reframe.core.containers import ContainerPlatform
+from reframe.core.fields import remove_convertible
 from reframe.core.deferrable import (_DeferredExpression,
                                      _DeferredPerformanceExpression)
 from reframe.core.environments import Environment
@@ -44,10 +51,14 @@
                                      SanityError, SkipTestError,
                                      ExpectedFailureError,
                                      UnexpectedSuccessError,
+                                     ReferenceParseError,
                                      ReframeError)
+from reframe.core.hooks import attach_hooks
+from reframe.core.logging import getlogger
 from reframe.core.meta import RegressionTestMeta
 from reframe.core.schedulers import Job
 from reframe.core.warnings import user_deprecation_warning
+from reframe.utility import ScopedDict
 
 
 class _NoRuntime(ContainerPlatform):
@@ -164,6 +175,330 @@ def __init_subclass__(cls):
         )
 
 
+class _KeyMatchingDict(UserDict):
+    '''Dictionary for matching missing keys to existing ones.
+
+    When a key is missing, it will be tried to matched against all other keys
+    in the dictionary, by treating those as regular expressions.
+
+    If a match is not found the ``default_factory`` callable will be called by
+    passing the missing key as its sole argument.
+    '''
+    def __init__(self, default_factory=None, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self._default_factory = default_factory
+
+    def __missing__(self, key):
+        # If an exact key match is not found, treat existing keys as regexes
+        # and try to match the key
+        for k, v in self.data.items():
+            if (isinstance(k, str) and isinstance(key, str) and
+                re.match(f'^{k}$', key)):
+                return v
+
+        # If no other key matches, delegate to user callback
+        if self._default_factory:
+            return self._default_factory(key)
+
+        raise KeyError(key)
+
+
+class RegressionTestDict(UserDict):
+    '''A multi-level dictionary indexed on test attributes.
+
+    This is a dictionary with an arbitrary number of levels. Each level's keys
+    are a subset of the possible values that a test's attribute can take. At
+    the top-level there must be a special key ``$index``, which determines the
+    test attributes that correspond to each dictionary level. If the
+    ``$index`` special key is not present, the dictionary behaves as a normal
+    dictionary. Here is an example:
+
+    .. code-block:: python
+
+        xdict = RegressionTestDict({
+            '$index': ('num_tasks', 'myparam'),
+            8: {
+                'foo': 'xyz',
+                'bar': 'xxx'
+            },
+            16: {
+                'foo': 'abc',
+                'bar': 'aaa'
+            }
+        })
+
+    The ``$index`` special key is consumed upon the dictionary's construction
+    and it can later be retrieved explicitly through the :attr:`index`
+    property. The dictionary can be queried as any other Python dictionary,
+    e.g., ``xdict[16]["bar"]`` will give "aaa". However, if keyed with a test,
+    it will determine the values of every level based on indexed test
+    attributes. For example,
+
+    .. code-block:: python
+
+        xdict[test]
+
+    is roughly equivalent to
+
+    .. code-block:: python
+
+        xdict[test.num_tasks][test.myparam]
+
+    If the test does not have the requested attribute or its value is not in
+    the multilevel dictionary, a :py:class:`KeyError` will be raised.
+
+    The ``$index`` of the dictionary can contain the following special
+    attributes which will access information about the current system and
+    environment of the test:
+
+    - ``$system``: equivalent to ``self.current_system.name``
+    - ``$partition``: equivalent to ``self.current_partition.fullname``
+    - ``$environ``: equivalent to ``self.current_environ.name``
+    - ``$processor.<attr>``: equivalent to a
+      ``self.current_partition.processor.<attr>``
+    - ``$dev.<devtype>.<attr>``: equivalent to
+      ``self.current_partition.select_devices(<devtype>)[0].<attr>``
+
+    Missing index keys
+    ------------------
+
+    If a key is missing and is a string, the dictionary will treat the
+    keys at that level as anchored regular expressions and
+    will match the attribute value against them. The value of the first
+    matching key is used. This allows a single entry to act as a fallback
+    for many keys (e.g. ``r'foo.*'`` will match all keys starting with
+    ``foo``).
+
+    Users can also define a test protocol to handle missing keys instead of
+    raising :class:`KeyError`. This is controlled by the ``protocol`` keyword
+    argument of the dictionary's constructor. The protocol
+    argument s a simple string and once attached, if a key is missing during
+    test-based lookup, the following test callback method will be called, if
+    defined:
+
+    .. code-block:: python
+
+        __<protocol>_missing_<subindex>__(data, key)
+
+    ``data`` is the sub-dictionary at the level of the sub-index and ``key``
+    is the missing key. In case the sub-index has one of the special values
+    above, every special character will be converted to `_` and then the
+    method will be looked up. In the example above, if we constructed the
+    dictionary with ``protocol="proto"``, and tried retrieve its value with a
+    test with ``num_tasks=32``, the following method would be called to
+    retrieve the missing value:
+
+    .. code-block:: python
+
+        test.__proto_missing_num_nodes__(data={
+            8: {
+                'foo': 'xyz',
+                'bar': 'xxx'
+            },
+            16: {
+                'foo': 'abc',
+                'bar': 'aaa'
+            }
+        }, key=32)
+
+    This method should return the missing value or raise :class:`KeyError`.
+
+    Constructor arguments
+    ----------------------
+
+    :user_dict: The user dictionary to be converted to a
+        :class:`RegressionTestDict`.
+    :protocol: The protocol to be used to handle missing keys.
+    '''
+    def __init__(self, user_dict: dict = None, protocol: str = None):
+        super().__init__(user_dict or {})
+        self._protocol = protocol
+        self._index = self.data.pop('$index', None)
+
+        ref3_type = typ.Tuple[~Deferrable, ~Deferrable, ~Deferrable]
+        ref4_type = typ.Tuple[~Deferrable, ~Deferrable, ~Deferrable, ~Deferrable]
+        reftuple_type = ref3_type | ref4_type | XfailRef
+        if self._index is None:
+            dict_type = typ.Dict[str, typ.Dict[str, reftuple_type]]
+        else:
+            dict_type = typ.Dict[str, reftuple_type]
+            for _ in self._index:
+                dict_type = typ.Dict[~Deferrable, dict_type]
+
+        if not isinstance(self.data, dict_type):
+            raise TypeError(f'user dictionary {self.data} does not match type {dict_type}')
+
+    @property
+    def protocol(self):
+        '''The protocol associated with this dictionary, :obj:`None` otherwise'''
+        return self._protocol
+
+    @property
+    def index(self):
+        '''The index associated with this dictionary, :obj:`None` otherwise'''
+        return self._index
+
+    def __getitem__(self, key):
+        if not isinstance(key, RegressionTestPlugin):
+            return super().__getitem__(key)
+
+        if not self._index:
+            raise KeyError(key)
+
+        test = key
+        try:
+            data = self.data
+            for subkey in self._index:
+                # Attach user's callback for missing keys converting also the
+                # special keys
+                if subkey.startswith('$'):
+                    _subkey  = subkey[1:].replace('.', '_')
+                else:
+                    _subkey = subkey
+
+                user_default_fn = None
+                if self._protocol:
+                    resolve_fn = getattr(test, f'__{self._protocol}_missing_{_subkey}__', None)
+                    if resolve_fn:
+                        user_default_fn = functools.partial(resolve_fn, data)
+
+                data = _KeyMatchingDict(user_default_fn, data)
+
+                subkey_parts = subkey.split('.')
+                if subkey_parts == ['$system']:
+                    data = data[test.current_system.name]
+                elif subkey_parts == ['$partition']:
+                    data = data[test.current_partition.fullname]
+                elif subkey_parts == ['$environ']:
+                    data = data[test.current_environ.name]
+                elif len(subkey_parts) == 2 and subkey_parts[0] == '$processor':
+                    proc = test.current_partition.processor
+                    data = data[getattr(proc, subkey_parts[1])]
+                elif len(subkey_parts) == 3 and subkey_parts[0] == '$dev':
+                    gpus = test.current_partition.select_devices(subkey_parts[1])[0]
+                    data = data[getattr(gpus, subkey_parts[2])]
+                else:
+                    data = data[getattr(test, subkey)]
+        except AttributeError:
+            raise KeyError(subkey) from None
+        else:
+            # If all good, return the final data
+            return data
+
+    def __rfm_json_encode__(self):
+        return self.data
+
+
+def RegressionTestDictType(*args, **kwargs):
+    class RegressionTestDictWithProto(RegressionTestDict):
+        __init__ = functools.partialmethod(RegressionTestDict.__init__,
+                                           *args, **kwargs)
+
+    return RegressionTestDictWithProto
+
+
+class _ReferenceDict(RegressionTestDict):
+    '''A specialized :class:`RegressionTestDict` that can handle also external
+    references
+
+    An external references file can be specified with the special key
+    ``$ref``.
+    '''
+    def __init__(self, user_dict=None, *, test):
+        user_dict = user_dict or {}
+        self.__test_entry_name = type(test).__name__
+
+        # We use the `$ref` file to populate the user_dict to be passed to the
+        # parent constructor.
+        ref_file = user_dict.pop('$ref', None)
+        if ref_file:
+            ref_prefix = rt.runtime().get_option('general/0/reference_prefix')
+            if ref_prefix is None:
+                ref_prefix = test.prefix
+
+            user_dict = self._read_ref_file(os.path.join(ref_prefix, ref_file))
+
+        try:
+            super().__init__(user_dict, protocol='ref')
+        except TypeError as err:
+            if ref_file:
+                # If we read from a reference file, re-raise the TypeError as
+                # a parse error
+                raise ReferenceParseError(f'{ref_file}: {err}') from err
+            else:
+                raise err
+
+    def _read_ref_file(self, filename):
+        def _parse_ref_entry(key, val):
+            try:
+                first = val[0]
+            except (TypeError, IndexError):
+                raise ReferenceParseError(
+                    f'{filename}: invalid reference entry {key!r}: {val}'
+                ) from None
+
+            if first == '$xfail':
+                return xfail(*val[1:-1], tuple(val[-1]))
+            elif isinstance(first, str):
+                raise ReferenceParseError(
+                    f'{filename}: unknown modifier {first!r} in entry {key!r}: {val}'
+                )
+            else:
+                return tuple(val)
+
+        def _entry(key, val, full_key, max_depth):
+            level = len(full_key.split('.')) - 2
+            if level == 0 and key == '$index':
+                return tuple(val)
+
+            if level < max_depth and isinstance(val, dict):
+                return {k: _entry(k, v, f'{full_key}.{k}', max_depth)
+                        for k, v in val.items()}
+
+            if level == max_depth:
+                return _parse_ref_entry(full_key, val)
+
+            return val
+
+        with open(filename) as fp:
+            ref_entries = yaml.safe_load(fp)
+
+        try:
+            ref_yaml = ref_entries[self.__test_entry_name]
+        except KeyError:
+            return {}
+
+        # Convert the yaml references to `reference`
+        mkref = {}
+
+        # Search for an index
+        if '$index' in ref_yaml:
+            index = tuple(ref_yaml['$index'])
+            mkref['$index'] = index
+            max_level = len(index)
+        else:
+            max_level = 1
+
+        for key, val in ref_yaml.items():
+            mkref[key] = _entry(key, val, f'{self.__test_entry_name}.{key}', max_level)
+
+        return mkref
+
+
+class _ReferenceDictField(fields.TypedField):
+    def __set__(self, obj, value):
+        value = remove_convertible(value)
+        if isinstance(value, (ScopedDict, _ReferenceDict)):
+            # Already converted; just set the field
+            value = fields.Field.__set__(self, obj, value)
+        else:
+            value = _ReferenceDict(value, test=obj)
+            if not value.index:
+                value = ScopedDict(value.data)
+
+        return fields.Field.__set__(self, obj, value)
+
+
 class RegressionTest(RegressionTestPlugin, jsonext.JSONSerializable):
     '''Base class for regression tests.
 
@@ -717,7 +1052,7 @@ def pipeline_hooks(cls):
     #: :default: :class:`False`
     local = variable(typ.Bool, value=False)
 
-    #: The set of reference values for this test.
+    #: The performance reference values for this test.
     #:
     #: The reference values are specified as a scoped dictionary keyed on the
     #: performance variables defined in :attr:`perf_patterns` and scoped under
@@ -769,6 +1104,8 @@ def pipeline_hooks(cls):
     #: Marking expected failures
     #: -------------------------
     #:
+    #: .. versionadded:: 4.9
+    #:
     #: You can mark any performance reference tuple as an expected failure
     #: using the :func:`~reframe.core.builtins.xfail` builtin as follows:
     #:
@@ -801,12 +1138,181 @@ def pipeline_hooks(cls):
     #:   expected failure, then the test is successful and its state is
     #:   ``PASS``.
     #:
-    #: :type: A scoped dictionary with system names as scopes, performance
-    #:   variables as keys and reference tuples as values.
+    #: Custom performance reference indexing
+    #: -------------------------------------
+    #:
+    #: .. versionadded:: 4.10
+    #:
+    #: By default, the reference dictionary is indexed by the system and/or
+    #: system/partition combination. However, references may also depend on
+    #: test variables and/or parameters as well as partition configuration
+    #: options, such as the processor and/or device details. In such cases,
+    #: using a custom reference index is desirable as it expresses better the
+    #: intent of the test.
+    #:
+    #: To create a custom indexing, the special ``$index`` key must be added.
+    #: This is a tuple of test attribute names or some special keys, which we
+    #: will describe later. The special ``$index`` key is consumed upon
+    #: creation and the rest of the dictionary must contain as many levels as
+    #: index keys. Each level's keys correspond to possible values of the
+    #: corresponding attribute or the special keys of the index. Here is an
+    #: example test:
+    #:
+    #: .. code-block:: python
+    #:
+    #:    class MyTest(rfm.RunOnlyRegressionTest):
+    #:        p = parameter(['foo', 'bar'])
+    #:        q = parameter([1, 2, 3])
+    #:        reference = {
+    #:            '$index': ('p', 'q'),
+    #:            'foo': {
+    #:                1: {'throughput': (80, -0.1, 0.2, 'it/s')},
+    #:                2: {'throughput': (95, -0.1, 0.2, 'it/s')},
+    #:                3: {'throughput': (100, -0.1, 0.2, 'it/s')},
+    #:            },
+    #:            'bar': {
+    #:                1: {'throughput': (40, -0.1, 0.2, 'it/s')},
+    #:                2: {'throughput': (45, -0.1, 0.2, 'it/s')},
+    #:                3: {'throughput': (50, -0.1, 0.2, 'it/s')},
+    #:            }
+    #:        }
+    #:        ...
+    #:
+    #:        @performance_function('it/s'):
+    #:        def throughput(self):
+    #:            return sn.extractsingle(r'Throughput: (\S+)', self.stdout, 1, float)
+    #:
+    #: During the performance pipeline stage, ReFrame will resolve the test
+    #: reference using the test's instance ``p`` and ``q`` attributes and will
+    #: pick the right reference tuples for the defined performance variables.
+    #: Note that ``p`` and ``q`` could be any test attribute and not just test
+    #: parameters or variables. As mentioned previously, in addition to test
+    #: attributes, the following special keys can be used in the ``$index``:
+    #:
+    #: - ``$system``: the current system name (equivalent to
+    #:   ``self.current_system.name``).
+    #: - ``$partition``: the current partition full name (equivalent to
+    #:    ``self.current_partition.fullname``).
+    #: - ``$environ``: the current environment name (equivalent to
+    #:   ``self.current_environ.name``).
+    #: - ``$processor.<attr>``: an attribute of the current partition's
+    #:   :attr:`~reframe.core.systems.SystemPartition.processor` (equivelant to
+    #:   ``self.current_partition.processor.<attr>``).
+    #: - ``$dev.<devtype>.<attr>``: an attribute of a specific
+    #:   :class:`~reframe.core.systems.DeviceInfo` type from the current
+    #:   partition (equivalent to
+    #:   ``self.current_partition.select_devices(<devtype>)[0].<attr>). For
+    #:   example, the ``$dev.gpu.model`` would retrieve the current partition's
+    #:   GPU model as an index key.
+    #:
+    #: Missing index keys
+    #: ^^^^^^^^^^^^^^^^^^
+    #:
+    #: If a key is missing and is a string, the framework will treat the
+    #: reference dict keys at that level as anchored regular expressions and
+    #: will match the attribute value against them. The value of the first
+    #: matching key is used. This allows a single entry to act as a fallback
+    #: for many keys (e.g. ``r'foo.*'`` will match all keys starting with
+    #: ``foo``).
+    #:
+    #: If at any given level, no key has been matched, the framework will look
+    #: for a special test protocol method defined as
+    #:
+    #: .. code-block:: python
+    #:
+    #:    def __ref_missing_<subindex>__(self, data, key):
+    #:        ...
+    #:
+    #: where ``<subindex>`` is the index name for that level with special
+    #: characters normalized with underscores, ``data`` is the sub-dictionary
+    #: at that level and ``key`` is the attribute value that is being looked
+    #: up. The method must return a value of the same type as the values of the
+    #: ``data`` subdictionary or raise a :class:`KeyError`. The use of such a
+    #: method is useful when a more complex logic is needed to determine the
+    #: actual reference that cannot be captured by a regex. The following
+    #: example shows how this method can be used to achieve the same effect as
+    #: the ``r'foo.*'`` regex:
+    #:
+    #: .. code-block:: python
+    #:
+    #:    def __ref_missing_processor_arch__(self, data, key):
+    #:        if key.startswith('foo'):
+    #:            return data['foo']
+    #:
+    #:        raise KeyError(key)
+    #:
+    #: Finally, if none of the above lookup methods succeeds, no reference
+    #: will be used.
+    #:
+    #: .. _external-references:
+    #:
+    #: External references
+    #: -------------------
+    #:
+    #: .. versionadded:: 4.10
+    #:
+    #: Reference data may be kept in a separate YAML file instead of in the
+    #: test class. This is useful when references need to be kept in a separate
+    #: repository with different visibility or access rules, or when
+    #: reference-only updates are desired so that reference values can be
+    #: edited without changing test code.
+    #:
+    #: External references can be specified by using the ``$ref`` special key
+    #: in the :attr:`reference` dictionary:
+    #:
+    #: .. code-block:: python
+    #:
+    #:     reference = {'$ref': 'refs.yaml'}
+    #:
+    #: The file is looked up under a *reference prefix* directory. This
+    #: directory can be set via the :attr:`~config.general.reference_prefix`
+    #: configuration option or the :envvar:`RFM_REFERENCE_PREFIX`. When not
+    #: explicitly set, it defaults to the test's :attr:`prefix` directory
+    #: (i.e., the directory containing the test file).
+    #:
+    #: A reference file can contain references for multiple tests as in the following example:
+    #:
+    #: .. code-block:: yaml
+    #:
+    #:     TestA:
+    #:       'sys0:part0':
+    #:         copy_bw: [23890, -0.10, 0.30, 'MB/s']
+    #:         triad_bw: [17064, -0.05, 0.50, 'MB/s']
+    #:       'sys0:part1':
+    #:         copy_bw: [30100, -0.10, 0.30, 'MB/s']
+    #:         triad_bw: ['$xfail', 'known issue', [22000, -0.05, 0.50, 'MB/s']]
+    #:
+    #:     TestB:
+    #:       $index: [num_tasks]
+    #:       1:
+    #:         throughput: [80, -0.1, 0.2, 'MB/s']
+    #:       4:
+    #:         throughput: [100, -0.1, 0.2, 'MB/s']
+    #:
+    #: Tests in the reference file are indexed by the class name (e.g.
+    #: ``TestA``) and the references follow the same structure as the
+    #: :attr:`reference` dictionary inside the test, except that reference
+    #: tuples are now lists. A custom index can also be specified for external
+    #: references using the ``$index`` special key as for the inline
+    #: references. Expected failures can also be marked using the syntax:
+    #:
+    #: .. code-block:: yaml
+    #:
+    #:     ['$xfail', 'message', <reftuple>]
+    #:
+    #: For a step-by-step example, see :ref:`howto-reference-index` in the
+    #: :doc:`howto`.
+    #:
+    #: :type: A dictionary with a special structure as described above.
     #:   The elements of reference tuples cannot be deferrable expressions.
     #: :default: ``{}``
     #:
     #: .. note::
+    #:    The reference values dictionary is implemented as a special case of
+    #:    the :class:`RegressionTestDict` that adds support for external
+    #:    references.
+    #:
+    #: .. note::
     #:     .. versionchanged:: 3.0
     #:        The measurement unit is required. The user should explicitly
     #:        specify :class:`None` if no unit is available.
@@ -824,14 +1330,11 @@ def pipeline_hooks(cls):
     #:     .. versionchanged:: 4.9
     #:        Support marking reference tuples as expected failures.
     #:
-    reference = variable(
-        typ.Tuple[~Deferrable, ~Deferrable, ~Deferrable] | XfailRef,
-        typ.Dict[str, typ.Dict[
-            str, typ.Tuple[~Deferrable, ~Deferrable, ~Deferrable, ~Deferrable] | XfailRef
-        ]],
-        XfailRef,
-        field=fields.ScopedDictField, value={}, loggable=False
-    )
+    #:     .. versionadded:: 4.10
+    #:        Support for custom reference indexes as well as external
+    #:        references.
+    reference = variable(_ReferenceDict, field=_ReferenceDictField, value={},
+                         allow_implicit=True, loggable=False)
 
     #: Require that a reference is defined for each system that this test is
     #: run on.
@@ -1250,7 +1753,7 @@ def _add_hooks(cls, stage):
 
         pipeline_hooks = cls._rfm_pipeline_hooks
         fn = getattr(cls, stage)
-        new_fn = hooks.attach_hooks(pipeline_hooks)(fn)
+        new_fn = attach_hooks(pipeline_hooks)(fn)
         setattr(cls, '_rfm_pipeline_fn_' + stage, new_fn)
 
     def __getattribute__(self, name):
@@ -1537,7 +2040,7 @@ def logger(self):
 
         You can use this logger to log information for your test.
         '''
-        return logging.getlogger()
+        return getlogger()
 
     @loggable
     @property
@@ -2367,6 +2870,21 @@ def is_performance_check(self):
         '''Return :obj:`True` if the test is a performance test.'''
         return self.perf_variables or hasattr(self, 'perf_patterns')
 
+    def _resolve_reference(self):
+        '''Determine the reference tuple for this test'''
+
+        if isinstance(self.reference, ScopedDict):
+            return self.reference
+
+        try:
+            return {
+                f'{self.current_partition.fullname}': self.reference[self]
+            }
+        except KeyError as err:
+            getlogger().debug(f'reference look up: key `{err}` not found: '
+                              'no reference will be set')
+            return {}
+
     @final
     def check_performance(self):
         '''The performance checking phase of the regression test pipeline.
@@ -2393,7 +2911,6 @@ def check_performance(self):
             return
 
         perf_patterns = getattr(self, 'perf_patterns', None)
-
         if perf_patterns is not None and self.perf_variables:
             # We can only make this check here, because variables can be set
             # anywhere in the test before this point.
@@ -2402,13 +2919,15 @@ def check_performance(self):
                 "'perf_variables' in a test"
             )
 
+        reference = self._resolve_reference()
+
         # Convert `perf_patterns` to `perf_variables`
         if perf_patterns:
             for var, expr in self.perf_patterns.items():
                 # Retrieve the unit from the reference tuple
                 key = f'{self._current_partition.fullname}:{var}'
                 try:
-                    unit = self.reference[key][3]
+                    unit = reference[key][3]
                     if unit is None:
                         unit = ''
                 except KeyError:
@@ -2425,7 +2944,7 @@ def check_performance(self):
                     value = expr.evaluate() if not self.is_dry_run() else None
                     unit = expr.unit
                 except Exception as e:
-                    logging.getlogger().warning(
+                    getlogger().warning(
                         f'skipping evaluation of performance variable '
                         f'{tag!r}: {e}'
                     )
@@ -2433,7 +2952,7 @@ def check_performance(self):
 
                 key = f'{self._current_partition.fullname}:{tag}'
                 try:
-                    ref = self.reference[key]
+                    ref = reference[key]
                     if isinstance(ref, _XFailReference):
                         xfailures[key] = ref.message
                         ref = ref.data
@@ -2443,7 +2962,7 @@ def check_performance(self):
                     # the performance function.
                     if len(ref) == 4:
                         if ref[3] != unit:
-                            logging.getlogger().warning(
+                            getlogger().warning(
                                 f'reference unit ({key!r}) for the '
                                 f'performance variable {tag!r} '
                                 f'does not match the unit specified '
@@ -2542,7 +3061,6 @@ def _fmt_errors(errlist, indent=''):
                     f'{key!r} is not a number: {val}'
                 )
 
-            tag = key.split(':')[-1]
             try:
                 sn.evaluate(
                     sn.assert_reference(val, ref, low_thres, high_thres)
@@ -2755,7 +3273,7 @@ def getdep(self, target, environ=None, part=None):
     def skip(self, msg=None):
         '''Skip test.
 
-        :arg msg: A message explaining why the test was skipped.
+        :arg msg: A message explaining why the test wasag skipped.
 
         .. versionadded:: 3.5.1
         '''
@@ -2930,3 +3448,4 @@ def check_sanity(self):
             self.sanity_patterns = sn.assert_true(1)
 
         super().check_sanity()
+
diff --git a/reframe/frontend/cli.py b/reframe/frontend/cli.py
index 37cfdb3fda..533c2f605b 100644
--- a/reframe/frontend/cli.py
+++ b/reframe/frontend/cli.py
@@ -847,6 +847,13 @@ def main():
         type=float,
         help='Minimum poll rate'
     )
+    argparser.add_argument(
+        dest='reference_prefix',
+        envvar='RFM_REFERENCE_PREFIX',
+        configvar='general/reference_prefix',
+        action='store',
+        help='Directory prefix for external references'
+    )
     argparser.add_argument(
         dest='remote_detect',
         envvar='RFM_REMOTE_DETECT',
diff --git a/reframe/utility/__init__.py b/reframe/utility/__init__.py
index 13a1f52abb..1fe881a794 100644
--- a/reframe/utility/__init__.py
+++ b/reframe/utility/__init__.py
@@ -951,8 +951,8 @@ class ScopedDict(UserDict):
 
     '''
 
-    def __init__(self, mapping={}, scope_sep=':', global_scope='*'):
-        super().__init__(mapping)
+    def __init__(self, mapping=None, scope_sep=':', global_scope='*'):
+        super().__init__(mapping or {})
         self._scope_sep = scope_sep
         self._global_scope = global_scope
 
diff --git a/reframe/utility/jsonext.py b/reframe/utility/jsonext.py
index 997fde23f6..351aebe456 100644
--- a/reframe/utility/jsonext.py
+++ b/reframe/utility/jsonext.py
@@ -35,7 +35,10 @@ def encode_dict(obj, *, recursive=False):
 
     Use the recursive option to also check the keys in nested dicts.
     '''
-    # FIXME: Need to add support for a decode_dict functionality
+
+    if hasattr(obj, '__rfm_json_encode__'):
+        return obj.__rfm_json_encode__()
+
     if isinstance(obj, MutableMapping):
         if recursive or any(isinstance(k, tuple) for k in obj):
             newobj = type(obj)()
diff --git a/unittests/resources/checks_unlisted/xfailures.py b/unittests/resources/checks_unlisted/xfailures.py
index 5655bf8363..64d83a750d 100644
--- a/unittests/resources/checks_unlisted/xfailures.py
+++ b/unittests/resources/checks_unlisted/xfailures.py
@@ -35,9 +35,13 @@ def set_references(self):
         if self.status == 'fail':
             self.reference['*:perf'] = (9, 0, 0, 'GB/s')
         elif self.status == 'xfail':
-            self.reference['*:perf'] = builtins.xfail('xfail perf', (9, 0, 0, 'GB/s'))
+            self.reference['*:perf'] = builtins.xfail(
+                'xfail perf', (9, 0, 0, 'GB/s')
+            )
         elif self.status == 'xpass':
-            self.reference['*:perf'] = builtins.xfail('xfail perf', self.reference['*:perf'])
+            self.reference['*:perf'] = builtins.xfail(
+                'xfail perf', self.reference['*:perf']
+            )
 
     @performance_function('GB/s')
     def perf(self):
diff --git a/unittests/test_pipeline.py b/unittests/test_pipeline.py
index cf73fc4562..7b175efd53 100644
--- a/unittests/test_pipeline.py
+++ b/unittests/test_pipeline.py
@@ -7,6 +7,8 @@
 import pytest
 import re
 import sys
+import yaml
+from pathlib import Path, PosixPath, WindowsPath
 
 import reframe as rfm
 import reframe.core.builtins as builtins
@@ -14,12 +16,13 @@
 import reframe.utility.osext as osext
 import reframe.utility.sanity as sn
 import unittests.utility as test_util
-from pathlib import Path
+
 
 from reframe.core.exceptions import (BuildError,
                                      ExpectedFailureError,
                                      PerformanceError,
                                      PipelineError,
+                                     ReferenceParseError,
                                      ReframeError,
                                      ReframeSyntaxError,
                                      SanityError,
@@ -781,7 +784,6 @@ class MyTest(rfm.RunOnlyRegressionTest,
 
         @sanity_function
         def validate(self):
-            print(self.stagedir)
             return sn.assert_true(os.path.exists('README.md'))
 
     _run(MyTest(), *local_exec_ctx)
@@ -1151,9 +1153,7 @@ def y(self):
 
     t0 = T0()
     t1 = T1()
-    print('==> running t0')
     _run(t0, *local_exec_ctx)
-    print('==> running t1')
     _run(t1, *local_exec_ctx)
     assert t0.var == 1
     assert t1.var == 1
@@ -1391,6 +1391,17 @@ def dummy_gpu_exec_ctx(testsys_exec_ctx):
     yield partition, environ
 
 
+@pytest.fixture
+def custom_exec_ctx(make_exec_ctx):
+    def _make_ctx(options):
+        make_exec_ctx(test_util.TEST_CONFIG_FILE, 'testsys', options)
+        partition = test_util.partition_by_name('gpu')
+        environ = test_util.environment_by_name('builtin', partition)
+        return partition, environ
+
+    return _make_ctx
+
+
 @pytest.fixture
 def perf_file(tmp_path):
     yield tmp_path / 'perf.out'
@@ -1401,13 +1412,31 @@ def sanity_file(tmp_path):
     yield tmp_path / 'sanity.out'
 
 
+@pytest.fixture
+def ref_file(tmp_path):
+    if sys.version_info >= (3, 12):
+        BasePath = Path
+    elif os.name == 'nt':
+        BasePath = WindowsPath
+    else:
+        BasePath = PosixPath
+
+    class YamlFile(BasePath):
+        def write_yaml(self, yaml_contents):
+            with open(self, 'w') as fp:
+                yaml.dump(yaml_contents, fp)
+
+    path = YamlFile(tmp_path / 'references' / 'mytest.yaml')
+    path.parent.mkdir(exist_ok=True)
+    yield path
+
+
 # NOTE: The following series of tests test the `perf_patterns` syntax, so they
 # should not change to the `@performance_function` syntax`
 
 @pytest.fixture
 def dummytest(testsys_exec_ctx, perf_file, sanity_file, tmp_path):
-    class MyTest(rfm.RunOnlyRegressionTest,
-                 custom_prefix=str(tmp_path)):
+    class MyTest(rfm.RunOnlyRegressionTest, custom_prefix=str(tmp_path)):
         perf_file = variable(str, Path)
         reference = {
             'testsys': {
@@ -1424,17 +1453,6 @@ class MyTest(rfm.RunOnlyRegressionTest,
         # in the class body. Instead, we set them in a post-init hook.
         @run_after('init')
         def set_perf_file(self):
-            self.perf_file = perf_file
-            self.sourcesdir = None
-            self.reference = {
-                'testsys': {
-                    'value1': (1.4, -0.1, 0.1, None),
-                    'value2': (1.7, -0.1, 0.1, None),
-                },
-                'testsys:gpu': {
-                    'value3': (3.1, -0.1, 0.1, None),
-                }
-            }
             self.perf_patterns = {
                 'value1': sn.extractsingle(
                     r'perf1 = (\S+)', perf_file, 1, float
@@ -1446,19 +1464,18 @@ def set_perf_file(self):
                     r'perf3 = (\S+)', perf_file, 1, float
                 )
             }
-            self.sanity_patterns = sn.assert_found(
-                r'result = success', sanity_file
-            )
+            self.sanity_patterns = sn.assert_found(r'result = success',
+                                                   sanity_file)
 
     yield MyTest()
 
 
 @pytest.fixture
-def dummytest_modern(testsys_exec_ctx, perf_file, sanity_file):
+def dummytest_modern(testsys_exec_ctx, perf_file, sanity_file, tmp_path):
     '''Modern version of the dummytest above'''
 
-    class MyTest(rfm.RunOnlyRegressionTest):
-        perf_file = perf_file
+    class MyTest(rfm.RunOnlyRegressionTest, custom_prefix=str(tmp_path)):
+        perf_file = variable(str, Path)
         reference = {
             'testsys': {
                 'value1': (1.4, -0.1, 0.1, None),
@@ -1485,6 +1502,10 @@ def value2(self):
         def value3(self):
             return sn.extractsingle(r'perf3 = (\S+)', perf_file, 1, float)
 
+        @run_after('init')
+        def set_perf_file(self):
+            self.perf_file = perf_file
+
     yield MyTest()
 
 
@@ -1548,14 +1569,14 @@ def test_sanity_multiple_files(dummytest, tmp_path, dummy_gpu_exec_ctx):
     _run_sanity(dummytest, *dummy_gpu_exec_ctx, skip_perf=True)
 
 
-def test_performance_failure(dummytest, sanity_file,
+def test_performance_failure(dummy_perftest, sanity_file,
                              perf_file, dummy_gpu_exec_ctx):
     sanity_file.write_text('result = success\n')
     perf_file.write_text('perf1 = 1.0\n'
                          'perf2 = 1.8\n'
                          'perf3 = 3.3\n')
     with pytest.raises(PerformanceError):
-        _run_sanity(dummytest, *dummy_gpu_exec_ctx)
+        _run_sanity(dummy_perftest, *dummy_gpu_exec_ctx)
 
 
 def test_reference_unknown_tag(dummytest, sanity_file,
@@ -1666,6 +1687,315 @@ class T(rfm.RegressionTest):
             reference = {'*': {'value1': (sn.defer(1), -0.1, -0.1)}}
 
 
+def test_reference_index(dummytest):
+    dummytest.x = 1
+    dummytest.y = 2
+    dummytest.reference = {
+        '$index': ('x', 'y'),
+        1: {2: {'value1': (1., -0.1, 0.1, None)}},
+        2: {2: {'value1': (2., -0.1, 0.1, None)}}
+    }
+    assert dummytest.reference[dummytest]['value1'] == (1., -0.1, 0.1, None)
+
+    dummytest.x = 2
+    assert dummytest.reference[dummytest]['value1'] == (2., -0.1, 0.1, None)
+
+    dummytest.x = 3
+    with pytest.raises(KeyError):
+        dummytest.reference[dummytest]['value1']
+
+
+def test_reference_index_regex(dummytest):
+    dummytest.x = 'foo'
+    dummytest.reference = {
+        '$index': ('x',),
+        'foo.*': {'value1': (1., -0.1, 0.1, None)}
+    }
+    assert dummytest.reference[dummytest]['value1'] == (1., -0.1, 0.1, None)
+
+    dummytest.x = 'foobar'
+    assert dummytest.reference[dummytest]['value1'] == (1., -0.1, 0.1, None)
+
+    # Verify that a strict match is required
+    dummytest.x = 'barfoo'
+    with pytest.raises(KeyError):
+        assert dummytest.reference[dummytest]
+
+
+def test_reference_index_regex_non_str(dummytest):
+    # Both the attribute and the reference key must be string to treat them as
+    # regexes
+    dummytest.x = 13
+    dummytest.reference = {
+        '$index': ('x',),
+        1: {'value1': (1., -0.1, 0.1, None)}
+    }
+    with pytest.raises(KeyError):
+        dummytest.reference[dummytest]['value1']
+
+    dummytest.reference = {
+        '$index': ('x',),
+        '1.*': {'value1': (1., -0.1, 0.1, None)}
+    }
+    with pytest.raises(KeyError):
+        dummytest.reference[dummytest]['value1']
+
+    dummytest.reference = {
+        '$index': ('x',),
+        '13': {'value1': (1., -0.1, 0.1, None)}
+    }
+    with pytest.raises(KeyError):
+        dummytest.reference[dummytest]['value1']
+
+
+@pytest.fixture(params=[('$system', 'testsys'),
+                        ('$partition', 'testsys:gpu'),
+                        ('$environ', 'builtin'),
+                        ('$processor.arch', 'skylake'),
+                        ('$dev.gpu.model', 'p100')])
+def special_attrs(request):
+    return request.param
+
+
+def test_reference_index_special(dummytest, special_attrs,
+                                 sanity_file, perf_file,
+                                 dummy_gpu_exec_ctx):
+    sanity_file.write_text('result = success\n')
+    perf_file.write_text('perf1 = 1.3\n')
+    attr, expected = special_attrs
+    dummytest.reference = {
+        '$index': (attr,),
+        expected: {
+            'value1': (1.4, -0.1, 0.1, None)
+        }
+    }
+    _run_sanity(dummytest, *dummy_gpu_exec_ctx)
+    assert dummytest.reference[dummytest]['value1'] == (1.4, -0.1, 0.1, None)
+
+
+def test_reference_index_special_unknown_attr(dummytest,
+                                              sanity_file, perf_file,
+                                              dummy_gpu_exec_ctx):
+    sanity_file.write_text('result = success\n')
+    perf_file.write_text('perf1 = 1.3\n')
+    dummytest.reference = {
+        '$index': ('$processor.arch', '$dev.gpu.foo'),
+        'skylake': {
+            'p100': {'value1': (10., -0.1, 0.1, None)}
+        }
+    }
+    _run_sanity(dummytest, *dummy_gpu_exec_ctx)
+    with pytest.raises(KeyError):
+        dummytest.reference[dummytest]
+
+
+@pytest.fixture(params=[('$processor', 'skylake'),
+                        ('$dev', 'p100'), ('$dev.gpu', 'p100')])
+def incomplete_special_attr(request):
+    return request.param
+
+
+def test_reference_index_special_incomplete(dummytest,
+                                            incomplete_special_attr,
+                                            sanity_file, perf_file,
+                                            dummy_gpu_exec_ctx):
+    sanity_file.write_text('result = success\n')
+    perf_file.write_text('perf1 = 1.3\n')
+    index, value = incomplete_special_attr
+    dummytest.reference = {
+        '$index': (index,),
+        value: {
+            'value1': (10., -0.1, 0.1, None)
+        }
+    }
+    _run_sanity(dummytest, *dummy_gpu_exec_ctx)
+    with pytest.raises(KeyError):
+        dummytest.reference[dummytest]
+
+
+def test_reference_index_protocol(dummytest, sanity_file, perf_file,
+                                  dummy_gpu_exec_ctx):
+    class _MyTest(type(dummytest)):
+        reference = {
+            '$index': ('$dev.gpu.model',),
+            'v100': {
+                'value1': (1.4, -0.1, 0.1, None)
+            }
+        }
+
+        def __ref_missing_dev_gpu_model__(self, data, key):
+            # Map p100 to v100 reference values
+            if key == 'p100':
+                return data['v100']
+
+    sanity_file.write_text('result = success\n')
+    perf_file.write_text('perf1 = 1.3\n')
+    test = _MyTest()
+    print(test.reference)
+    _run_sanity(test, *dummy_gpu_exec_ctx)
+    assert test.reference[test]['value1'] == (1.4, -0.1, 0.1, None)
+
+
+@pytest.fixture(params=['absolute', 'relative'])
+def make_path(request, tmp_path):
+    def _make_path(path):
+        if request.param == 'absolute':
+            return path
+
+        return path.relative_to(tmp_path)
+
+    return _make_path
+
+
+def test_reference_external_noindex(dummytest, make_path,
+                                    sanity_file, perf_file, ref_file,
+                                    dummy_gpu_exec_ctx):
+    ref_file.write_yaml({
+        'MyTest': {
+            'testsys': {
+                'value1': [1.4, -0.1, 0.1, None],
+                'value2': [1.7, -0.1, 0.1, None],
+            },
+            'testsys:gpu': {
+                'value3': [3.1, -0.1, 0.1, None],
+            }
+        }
+    })
+    dummytest.reference = {'$ref': make_path(ref_file)}
+    sanity_file.write_text('result = success\n')
+    perf_file.write_text('perf1 = 1.3\n')
+    _run_sanity(dummytest, *dummy_gpu_exec_ctx)
+
+
+def test_reference_external_with_index(dummytest, make_path,
+                                       sanity_file, perf_file, ref_file,
+                                       dummy_gpu_exec_ctx):
+    ref_file.write_yaml({
+        'MyTest': {
+            '$index': ['$processor.arch', '$dev.gpu.model'],
+            'skylake': {
+                'p100': {
+                    'value1': [1.4, -0.1, 0.1, None],
+                    'value2': [1.7, -0.1, 0.1, None]
+                }
+            }
+        }
+    })
+    dummytest.reference = {'$ref': make_path(ref_file)}
+    sanity_file.write_text('result = success\n')
+    perf_file.write_text('perf1 = 1.3\n')
+    perf_file.write_text('perf2 = 1.7\n')
+    _run_sanity(dummytest, *dummy_gpu_exec_ctx)
+
+
+def test_reference_external_xfail(dummytest_modern, sanity_file, perf_file, ref_file,
+                                  dummy_gpu_exec_ctx):
+    ref_file.write_yaml({
+        'MyTest': {
+            '$index': ['$processor.arch', '$dev.gpu.model'],
+            'skylake': {
+                'p100': {
+                    'value1': ['$xfail', 'expected', [1.4, -0.1, 0.1, None]],
+                    'value2': [1.7, -0.1, 0.1, None]
+                }
+            }
+        }
+    })
+    dummytest_modern.reference = {'$ref': ref_file}
+    sanity_file.write_text('result = success\n')
+    perf_file.write_text('perf1 = 1.0\n')
+    perf_file.write_text('perf2 = 1.7\n')
+    _run_sanity(dummytest_modern, *dummy_gpu_exec_ctx)
+
+
+@pytest.fixture(params=[
+    ['$invalid_modifier', 'expected', [1.4, -0.1, 0.1, None]],
+    [],
+    'invalid',
+    123,
+    [1.4],
+    [1.4, -0.1]
+])
+def invalid_ref_entry(request):
+    return request.param
+
+
+def test_reference_external_invalid_ref_entry(dummytest_modern, ref_file,
+                                              invalid_ref_entry):
+    ref_file.write_yaml({
+        'MyTest': {
+            '$index': ['$processor.arch', '$dev.gpu.model'],
+            'skylake': {
+                'p100': {
+                    'value1': invalid_ref_entry,
+                }
+            }
+        }
+    })
+    with pytest.raises(ReferenceParseError):
+        dummytest_modern.reference = {'$ref': ref_file}
+
+
+def test_reference_external_custom_prefix(dummytest_modern, make_path,
+                                          sanity_file, perf_file, ref_file,
+                                          tmp_path, custom_exec_ctx):
+    ref_file.write_yaml({
+        'MyTest': {
+            '$index': ['$processor.arch', '$dev.gpu.model'],
+            'skylake': {
+                'p100': {
+                    'value1': [1.4, -0.1, 0.1, None],
+                    'value2': [1.7, -0.1, 0.1, None]
+                }
+            }
+        }
+    })
+    dummytest_modern.reference = {'$ref': make_path(ref_file)}
+    sanity_file.write_text('result = success\n')
+    perf_file.write_text('perf1 = 1.3\n')
+    perf_file.write_text('perf2 = 1.7\n')
+    _run_sanity(dummytest_modern,
+                *custom_exec_ctx({'general/reference_prefix': tmp_path}))
+
+
+def test_regressiondict_custom_protocol(dummy_gpu_exec_ctx):
+    class _MyTest(rfm.RunOnlyRegressionTest):
+        x = variable(int, value=1)
+        foo = variable(rfm.RegressionTestDictType(protocol='foo'), value={
+            '$index': ('$dev.gpu.model', 'x'),
+            'v100': {
+                2: {'value1': (1.4, -0.1, 0.1, None)},
+                4: {'value1': (2.8, -0.1, 0.1, None)},
+            }
+        }, allow_implicit=True)
+
+        def __foo_missing_dev_gpu_model__(self, data, key):
+            # Map p100 to v100 reference values
+            if key == 'p100':
+                return data['v100']
+
+        def __foo_missing_x__(self, data, key):
+            if key > 4:
+                return data[4]
+
+            raise KeyError(key)
+
+    test = _MyTest()
+    test.x = 2
+    test.setup(*dummy_gpu_exec_ctx)
+    assert test.foo[test] == {'value1': (1.4, -0.1, 0.1, None)}
+
+    test.x = 4
+    assert test.foo[test] == {'value1': (2.8, -0.1, 0.1, None)}
+
+    test.x = 6
+    assert test.foo[test] == {'value1': (2.8, -0.1, 0.1, None)}
+
+    test.x = 1
+    with pytest.raises(KeyError):
+        test.foo[test]
+
+
 def test_performance_invalid_value(dummytest, sanity_file,
                                    perf_file, dummy_gpu_exec_ctx):
     sanity_file.write_text('result = success\n')

From 5149b445b97142a7ed044c864a1248eeaeade136 Mon Sep 17 00:00:00 2001
From: Vasileios Karakasis <vkarak@gmail.com>
Date: Tue, 24 Feb 2026 23:31:18 +0100
Subject: [PATCH 3/3] Add more unit tests + fix coding style issues

---
 reframe/core/pipeline.py    | 69 +++++++++++++++++++++----------------
 reframe/utility/__init__.py |  4 +++
 unittests/test_pipeline.py  | 42 ++++++++++++++++++++--
 unittests/test_utility.py   | 11 ++++++
 4 files changed, 94 insertions(+), 32 deletions(-)

diff --git a/reframe/core/pipeline.py b/reframe/core/pipeline.py
index 2071a664b1..524a55122d 100644
--- a/reframe/core/pipeline.py
+++ b/reframe/core/pipeline.py
@@ -315,22 +315,10 @@ def __init__(self, user_dict: dict = None, protocol: str = None):
         self._protocol = protocol
         self._index = self.data.pop('$index', None)
 
-        ref3_type = typ.Tuple[~Deferrable, ~Deferrable, ~Deferrable]
-        ref4_type = typ.Tuple[~Deferrable, ~Deferrable, ~Deferrable, ~Deferrable]
-        reftuple_type = ref3_type | ref4_type | XfailRef
-        if self._index is None:
-            dict_type = typ.Dict[str, typ.Dict[str, reftuple_type]]
-        else:
-            dict_type = typ.Dict[str, reftuple_type]
-            for _ in self._index:
-                dict_type = typ.Dict[~Deferrable, dict_type]
-
-        if not isinstance(self.data, dict_type):
-            raise TypeError(f'user dictionary {self.data} does not match type {dict_type}')
-
     @property
     def protocol(self):
-        '''The protocol associated with this dictionary, :obj:`None` otherwise'''
+        '''The protocol associated with this dictionary,
+        :obj:`None` otherwise'''
         return self._protocol
 
     @property
@@ -358,7 +346,9 @@ def __getitem__(self, key):
 
                 user_default_fn = None
                 if self._protocol:
-                    resolve_fn = getattr(test, f'__{self._protocol}_missing_{_subkey}__', None)
+                    resolve_fn = getattr(
+                        test, f'__{self._protocol}_missing_{_subkey}__', None
+                    )
                     if resolve_fn:
                         user_default_fn = functools.partial(resolve_fn, data)
 
@@ -371,11 +361,14 @@ def __getitem__(self, key):
                     data = data[test.current_partition.fullname]
                 elif subkey_parts == ['$environ']:
                     data = data[test.current_environ.name]
-                elif len(subkey_parts) == 2 and subkey_parts[0] == '$processor':
+                elif (len(subkey_parts) == 2 and
+                      subkey_parts[0] == '$processor'):
                     proc = test.current_partition.processor
                     data = data[getattr(proc, subkey_parts[1])]
                 elif len(subkey_parts) == 3 and subkey_parts[0] == '$dev':
-                    gpus = test.current_partition.select_devices(subkey_parts[1])[0]
+                    gpus = test.current_partition.select_devices(
+                        subkey_parts[1]
+                    )[0]
                     data = data[getattr(gpus, subkey_parts[2])]
                 else:
                     data = data[getattr(test, subkey)]
@@ -418,8 +411,9 @@ def __init__(self, user_dict=None, *, test):
 
             user_dict = self._read_ref_file(os.path.join(ref_prefix, ref_file))
 
+        super().__init__(user_dict, protocol='ref')
         try:
-            super().__init__(user_dict, protocol='ref')
+            self._check_dict_type()
         except TypeError as err:
             if ref_file:
                 # If we read from a reference file, re-raise the TypeError as
@@ -428,6 +422,22 @@ def __init__(self, user_dict=None, *, test):
             else:
                 raise err
 
+    def _check_dict_type(self):
+        ref3_type = typ.Tuple[~Deferrable, ~Deferrable, ~Deferrable]
+        ref4_type = typ.Tuple[~Deferrable, ~Deferrable,
+                              ~Deferrable, ~Deferrable]
+        reftuple_type = ref3_type | ref4_type | XfailRef
+        if self.index is None:
+            dict_type = typ.Dict[str, typ.Dict[str, reftuple_type]]
+        else:
+            dict_type = typ.Dict[str, reftuple_type]
+            for _ in self.index:
+                dict_type = typ.Dict[~Deferrable, dict_type]
+
+        if not isinstance(self.data, dict_type):
+            raise TypeError(f'user dictionary {self.data} '
+                            f'does not match type {dict_type}')
+
     def _read_ref_file(self, filename):
         def _parse_ref_entry(key, val):
             try:
@@ -441,7 +451,8 @@ def _parse_ref_entry(key, val):
                 return xfail(*val[1:-1], tuple(val[-1]))
             elif isinstance(first, str):
                 raise ReferenceParseError(
-                    f'{filename}: unknown modifier {first!r} in entry {key!r}: {val}'
+                    f'{filename}: unknown modifier {first!r} '
+                    f'in entry {key!r}: {val}'
                 )
             else:
                 return tuple(val)
@@ -451,14 +462,11 @@ def _entry(key, val, full_key, max_depth):
             if level == 0 and key == '$index':
                 return tuple(val)
 
-            if level < max_depth and isinstance(val, dict):
+            if level < max_depth:
                 return {k: _entry(k, v, f'{full_key}.{k}', max_depth)
                         for k, v in val.items()}
 
-            if level == max_depth:
-                return _parse_ref_entry(full_key, val)
-
-            return val
+            return _parse_ref_entry(full_key, val)
 
         with open(filename) as fp:
             ref_entries = yaml.safe_load(fp)
@@ -480,7 +488,8 @@ def _entry(key, val, full_key, max_depth):
             max_level = 1
 
         for key, val in ref_yaml.items():
-            mkref[key] = _entry(key, val, f'{self.__test_entry_name}.{key}', max_level)
+            mkref[key] = _entry(key, val,
+                                f'{self.__test_entry_name}.{key}', max_level)
 
         return mkref
 
@@ -1180,7 +1189,8 @@ def pipeline_hooks(cls):
     #:
     #:        @performance_function('it/s'):
     #:        def throughput(self):
-    #:            return sn.extractsingle(r'Throughput: (\S+)', self.stdout, 1, float)
+    #:            return sn.extractsingle(r'Throughput: (\S+)',
+    #:                                    self.stdout, 1, float)
     #:
     #: During the performance pipeline stage, ReFrame will resolve the test
     #: reference using the test's instance ``p`` and ``q`` attributes and will
@@ -1270,7 +1280,8 @@ def pipeline_hooks(cls):
     #: explicitly set, it defaults to the test's :attr:`prefix` directory
     #: (i.e., the directory containing the test file).
     #:
-    #: A reference file can contain references for multiple tests as in the following example:
+    #: A reference file can contain references for multiple tests as in the
+    #: following example:
     #:
     #: .. code-block:: yaml
     #:
@@ -1678,7 +1689,8 @@ def __rfm_init__(self):
 
         # Static directories of the regression check
         if (self.sourcesdir == 'src' and
-            not os.path.isdir(os.path.join(self._rfm_prefix, self.sourcesdir))):
+            not os.path.isdir(os.path.join(self._rfm_prefix,
+                                           self.sourcesdir))):
             self.sourcesdir = None
 
         # Runtime information of the test
@@ -3448,4 +3460,3 @@ def check_sanity(self):
             self.sanity_patterns = sn.assert_true(1)
 
         super().check_sanity()
-
diff --git a/reframe/utility/__init__.py b/reframe/utility/__init__.py
index 1fe881a794..d5e2cb7e4a 100644
--- a/reframe/utility/__init__.py
+++ b/reframe/utility/__init__.py
@@ -1059,6 +1059,10 @@ def __contains__(self, key):
             return True
 
     def __getitem__(self, key):
+        if not isinstance(key, str):
+            # Only string keys are stored in ScopedDict
+            raise KeyError(key)
+
         try:
             return self._lookup(key)
         except KeyError:
diff --git a/unittests/test_pipeline.py b/unittests/test_pipeline.py
index 7b175efd53..a0a8a773f4 100644
--- a/unittests/test_pipeline.py
+++ b/unittests/test_pipeline.py
@@ -1888,8 +1888,8 @@ def test_reference_external_with_index(dummytest, make_path,
     _run_sanity(dummytest, *dummy_gpu_exec_ctx)
 
 
-def test_reference_external_xfail(dummytest_modern, sanity_file, perf_file, ref_file,
-                                  dummy_gpu_exec_ctx):
+def test_reference_external_xfail(dummytest_modern, sanity_file, perf_file,
+                                  ref_file, dummy_gpu_exec_ctx):
     ref_file.write_yaml({
         'MyTest': {
             '$index': ['$processor.arch', '$dev.gpu.model'],
@@ -1908,6 +1908,30 @@ def test_reference_external_xfail(dummytest_modern, sanity_file, perf_file, ref_
     _run_sanity(dummytest_modern, *dummy_gpu_exec_ctx)
 
 
+def test_reference_external_unknown_test(dummytest_modern, sanity_file,
+                                         perf_file, ref_file,
+                                         dummy_gpu_exec_ctx):
+    ref_file.write_yaml({
+        'MyTest2': {
+            '$index': ['$processor.arch', '$dev.gpu.model'],
+            'skylake': {
+                'p100': {
+                    'value1': [1.4, -0.1, 0.1, None],
+                    'value2': [1.7, -0.1, 0.1, None]
+                }
+            }
+        }
+    })
+    dummytest_modern.reference = {'$ref': ref_file}
+    sanity_file.write_text('result = success\n')
+    perf_file.write_text('perf1 = 1.0\n')
+    perf_file.write_text('perf2 = 1.0\n')
+
+    # This will not raise a PerformanceError as the reference is for another
+    # test
+    _run_sanity(dummytest_modern, *dummy_gpu_exec_ctx)
+
+
 @pytest.fixture(params=[
     ['$invalid_modifier', 'expected', [1.4, -0.1, 0.1, None]],
     [],
@@ -1958,7 +1982,19 @@ def test_reference_external_custom_prefix(dummytest_modern, make_path,
                 *custom_exec_ctx({'general/reference_prefix': tmp_path}))
 
 
-def test_regressiondict_custom_protocol(dummy_gpu_exec_ctx):
+def test_regressiontestdict_normal_key():
+    d = rfm.RegressionTestDict({'a': 1, 'b': 2})
+    assert d['a'] == 1
+    assert d['b'] == 2
+
+
+def test_regressiontestdict_noindex(dummytest):
+    d = rfm.RegressionTestDict({'a': 1, 'b': 2})
+    with pytest.raises(KeyError):
+        d[dummytest]
+
+
+def test_regressiontestdict_custom_protocol(dummy_gpu_exec_ctx):
     class _MyTest(rfm.RunOnlyRegressionTest):
         x = variable(int, value=1)
         foo = variable(rfm.RegressionTestDictType(protocol='foo'), value={
diff --git a/unittests/test_utility.py b/unittests/test_utility.py
index 2ffc733810..a552698171 100644
--- a/unittests/test_utility.py
+++ b/unittests/test_utility.py
@@ -1194,6 +1194,17 @@ def test_scoped_dict_key_resolution():
     assert scoped_dict.scope(None) == {}
 
 
+def test_scoped_dict_non_str_key():
+    scoped_dict = reframe.utility.ScopedDict({
+        'a': {'k1': 1, 'k2': 2},
+        'a:b': {'k1': 3, 'k3': 4},
+        'a:b:c': {'k2': 5, 'k3': 6},
+        '*': {'k1': 7, 'k3': 9, 'k4': 10}
+    })
+    with pytest.raises(KeyError):
+        scoped_dict[3]
+
+
 def test_scoped_dict_setitem():
     scoped_dict = reframe.utility.ScopedDict({
         'a': {'k1': 1, 'k2': 2},