diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py index 0641eb7e..7b5665b1 100644 --- a/kernel_tuner/interface.py +++ b/kernel_tuner/interface.py @@ -65,7 +65,9 @@ pyatf_strategies, random_sample, simulated_annealing, - skopt + skopt, + gen_hybrid_vndx, + gen_adaptive_tabu_greywolf, ) from kernel_tuner.strategies.wrapper import OptAlgWrapper @@ -87,6 +89,8 @@ "firefly_algorithm": firefly_algorithm, "bayes_opt": bayes_opt, "pyatf_strategies": pyatf_strategies, + "hybrid_vndx": gen_hybrid_vndx, + "adaptive_tabu_greywolf": gen_adaptive_tabu_greywolf, } @@ -397,6 +401,8 @@ def __deepcopy__(self, _): * "random_sample" takes a random sample of the search space * "simulated_annealing" simulated annealing strategy * "skopt" uses the minimization methods from `skopt` + * "HybridVNDX" a hybrid variable neighborhood descent strategy + * "AdaptiveTabuGreyWolf" an adaptive tabu-guided grey wolf optimization strategy Strategy-specific parameters and options are explained under strategy_options. diff --git a/kernel_tuner/strategies/gen_adaptive_tabu_greywolf.py b/kernel_tuner/strategies/gen_adaptive_tabu_greywolf.py new file mode 100644 index 00000000..e7c915de --- /dev/null +++ b/kernel_tuner/strategies/gen_adaptive_tabu_greywolf.py @@ -0,0 +1,239 @@ +""" +Adaptive Tabu-Guided Grey Wolf Optimization. + +Algorithm generated as part of the paper "Automated Algorithm Design For Auto-Tuning Optimizers". +""" + +import random +import math +from collections import deque + +from kernel_tuner.util import StopCriterionReached +from kernel_tuner.strategies import common +from kernel_tuner.strategies.common import CostFunc + + +_options = dict( + budget=("maximum number of evaluations", 5000), + pack_size=("number of wolves", 8), + tabu_factor=("tabu size multiplier", 3), + shake_rate=("base shaking probability", 0.2), + jump_rate=("random jump probability", 0.15), + stagn_limit=("stagnation limit before restart", 80), + restart_ratio=("fraction of pack to restart", 0.3), + t0=("initial temperature", 1.0), + t_decay=("temperature decay rate", 5.0), + t_min=("minimum temperature", 1e-4), + constraint_aware=("constraint-aware optimization (True/False)", True), +) + + +def tune(searchspace, runner, tuning_options): + + options = tuning_options.strategy_options + if "x0" in options: + raise ValueError("Starting point (x0) is not supported for AdaptiveTabuGreyWolf strategy.") + (budget, pack_size, tabu_factor, shake_rate, jump_rate, + stagn_limit, restart_ratio, t0, t_decay, t_min, constraint_aware) = \ + common.get_options(options, _options) + + cost_func = CostFunc(searchspace, tuning_options, runner) + + alg = AdaptiveTabuGreyWolf( + searchspace, cost_func, + budget, pack_size, tabu_factor, + shake_rate, jump_rate, + stagn_limit, restart_ratio, + t0, t_decay, t_min, + constraint_aware, + tuning_options.verbose, + ) + + try: + alg.run() + except StopCriterionReached as e: + if tuning_options.verbose: + print(e) + + return cost_func.results + + +tune.__doc__ = common.get_strategy_docstring("Adaptive Tabu Grey Wolf", _options) + + +class AdaptiveTabuGreyWolf: + + def __init__(self, searchspace, cost_func, + budget, pack_size, tabu_factor, + shake_rate, jump_rate, + stagn_limit, restart_ratio, + t0, t_decay, t_min, + constraint_aware, verbose): + + self.searchspace = searchspace + self.cost_func = cost_func + self.budget = budget + self.pack_size = pack_size + self.tabu = deque(maxlen=pack_size * tabu_factor) + self.shake_rate = shake_rate + self.jump_rate = jump_rate + self.stagn_limit = stagn_limit + self.restart_ratio = restart_ratio + self.t0 = t0 + self.t_decay = t_decay + self.t_min = t_min + self.constraint_aware = constraint_aware + self.verbose = verbose + + def evaluate(self, dna): + return self.cost_func(dna, check_restrictions=not self.constraint_aware) + + def sample_valid(self): + while True: + x = list(self.searchspace.get_random_sample(1)[0]) + if not self.constraint_aware or self.searchspace.is_param_config_valid(tuple(x)): + return x + + def repair(self, sol): + if not self.constraint_aware or self.searchspace.is_param_config_valid(tuple(sol)): + return sol + + # try neighbors + for m in ("adjacent", "Hamming", "strictly-adjacent"): + for nb in self.searchspace.get_neighbors(tuple(sol), neighbor_method=m): + if self.searchspace.is_param_config_valid(nb): + return list(nb) + + return self.sample_valid() + + def run(self): + + # initialize pack + pack = [] + num_evals = 0 + + for cfg in self.searchspace.get_random_sample(self.pack_size): + sol = list(cfg) + + try: + val = self.evaluate(sol) + num_evals += 1 + except StopCriterionReached: + raise + + pack.append((sol, val)) + self.tabu.append(tuple(sol)) + + pack.sort(key=lambda x: x[1]) + + best_sol, best_val = pack[0] + stagn = 0 + iteration = 0 + + while num_evals < self.budget: + + iteration += 1 + frac = num_evals / self.budget + + # temperature schedule + T = max(self.t_min, self.t0 * math.exp(-self.t_decay * frac)) + + # reheating + if stagn and stagn % max(1, (self.stagn_limit // 2)) == 0: + T += self.t0 * 0.2 + + # adaptive shaking + shake_p = min(0.5, self.shake_rate * (1 + stagn / self.stagn_limit)) + + pack.sort(key=lambda x: x[1]) + alpha, beta, delta = pack[0][0], pack[1][0], pack[2][0] + + new_pack = [] + + for sol, sol_val in pack: + + # leaders survive + if sol in (alpha, beta, delta): + new_pack.append((sol, sol_val)) + continue + + D = len(sol) + + # recombination + child = [ + random.choice((alpha[i], beta[i], delta[i], sol[i])) + for i in range(D) + ] + + # shaking + if random.random() < shake_p: + if random.random() < self.jump_rate: + idx = random.randrange(D) + rnd = random.choice(self.searchspace.get_random_sample(1)) + child[idx] = rnd[idx] + else: + method = "adjacent" if frac < 0.5 else "strictly-adjacent" + nbrs = list(self.searchspace.get_neighbors(tuple(child), neighbor_method=method)) + if nbrs: + child = list(random.choice(nbrs)) + + # repair + child = self.repair(child) + tchild = tuple(child) + + # tabu handling + if tchild in self.tabu: + nbrs = list(self.searchspace.get_neighbors(tchild, neighbor_method="Hamming")) + if nbrs: + child = list(random.choice(nbrs)) + + try: + fch = self.evaluate(child) + num_evals += 1 + except StopCriterionReached: + raise + + self.tabu.append(tuple(child)) + + # SA acceptance + dE = fch - sol_val + if dE <= 0 or random.random() < math.exp(-dE / T): + new_pack.append((child, fch)) + else: + new_pack.append((sol, sol_val)) + + if num_evals >= self.budget: + break + + pack = new_pack + pack.sort(key=lambda x: x[1]) + + # update best + if pack[0][1] < best_val: + best_sol, best_val = pack[0] + stagn = 0 + else: + stagn += 1 + + # restart + if stagn >= self.stagn_limit: + nr = int(math.ceil(self.pack_size * self.restart_ratio)) + + for i in range(self.pack_size - nr, self.pack_size): + sol = self.sample_valid() + + try: + val = self.evaluate(sol) + num_evals += 1 + except StopCriterionReached: + raise + + pack[i] = (sol, val) + self.tabu.append(tuple(sol)) + + pack.sort(key=lambda x: x[1]) + best_sol, best_val = pack[0] + stagn = 0 + + if self.verbose and num_evals % 50 == 0: + print(f"Evaluations: {num_evals}, best: {best_val}") \ No newline at end of file diff --git a/kernel_tuner/strategies/gen_hybrid_vndx.py b/kernel_tuner/strategies/gen_hybrid_vndx.py new file mode 100644 index 00000000..473ffcd5 --- /dev/null +++ b/kernel_tuner/strategies/gen_hybrid_vndx.py @@ -0,0 +1,236 @@ +""" +Hybrid VND with surrogate modeling, adaptive neighborhoods, and annealing. + +Algorithm generated as part of the paper "Automated Algorithm Design For Auto-Tuning Optimizers". +""" + +import random +import math +import collections +import heapq + +from kernel_tuner.util import StopCriterionReached +from kernel_tuner.strategies import common +from kernel_tuner.strategies.common import CostFunc + + +_options = dict( + budget=("maximum number of evaluations", 5000), + k=("k for k-NN surrogate", 5), + cand_pool=("candidate pool size", 8), + restart_iter=("iterations before restart", 100), + tabu_size=("tabu list size", 300), + elite_size=("elite set size", 5), + temp0=("initial temperature", 1.0), + cooling=("cooling rate", 0.995), + constraint_aware=("constraint-aware optimization (True/False)", True), +) + + +def tune(searchspace, runner, tuning_options): + + options = tuning_options.strategy_options + if "x0" in options: + raise ValueError("Starting point (x0) is not supported for HybridVNDX strategy.") + budget, k, cand_pool, restart_iter, tabu_size, elite_size, temp0, cooling, constraint_aware = common.get_options(options, _options) + + cost_func = CostFunc(searchspace, tuning_options, runner) + + alg = HybridVNDX( + searchspace, + cost_func, + budget, + k, + cand_pool, + restart_iter, + tabu_size, + elite_size, + temp0, + cooling, + constraint_aware, + tuning_options.verbose, + ) + + try: + alg.run() + except StopCriterionReached as e: + if tuning_options.verbose: + print(e) + + return cost_func.results + + +tune.__doc__ = common.get_strategy_docstring("Hybrid VNDX", _options) + + +class HybridVNDX: + + def __init__(self, searchspace, cost_func, budget, k, cand_pool, + restart_iter, tabu_size, elite_size, temp0, cooling, + constraint_aware, verbose): + + self.searchspace = searchspace + self.cost_func = cost_func + self.budget = budget + self.k = k + self.cand_pool = cand_pool + self.restart_iter = restart_iter + self.tabu_size = tabu_size + self.elite_size = elite_size + self.temp0 = temp0 + self.cooling = cooling + self.constraint_aware = constraint_aware + self.verbose = verbose + + self.neighbor_methods = ["strictly-adjacent", "adjacent", "Hamming"] + + def sample_valid(self): + while True: + x = list(self.searchspace.get_random_sample(1)[0]) + if not self.constraint_aware or self.searchspace.is_param_config_valid(tuple(x)): + return x + + def repair(self, x): + if not self.constraint_aware or self.searchspace.is_param_config_valid(tuple(x)): + return x + for _ in range(5): + y = list(self.searchspace.get_random_sample(1)[0]) + if self.searchspace.is_param_config_valid(tuple(y)): + return y + return x + + def knn_predict(self, tpl, history): + dists = [(sum(a != b for a, b in zip(tpl, xh)), fh) for xh, fh in history] + dists.sort(key=lambda z: z[0]) + top = dists[:self.k] + return sum(f for _, f in top) / len(top) + + def pick_nm(self, nm_weight): + total = sum(nm_weight.values()) + r = random.random() * total + cum = 0 + for nm, w in nm_weight.items(): + cum += w + if r <= cum: + return nm + return self.neighbor_methods[-1] + + def evaluate(self, dna): + return self.cost_func(dna, check_restrictions=not self.constraint_aware) + + def run(self): + + curr = self.sample_valid() + curr_f = self.evaluate(curr) + + best = list(curr) + best_f = curr_f + + history = [(tuple(curr), curr_f)] + + tabu = collections.deque(maxlen=self.tabu_size) + tabu.append(tuple(curr)) + + elite = [(curr_f, tuple(curr))] + + nm_weight = {nm: 1.0 for nm in self.neighbor_methods} + + no_improve = 0 + temp = self.temp0 + num_evals = 1 + + while num_evals < self.budget: + + nm = self.pick_nm(nm_weight) + + # generate candidate pool + pool = [] + + nbrs = self.searchspace.get_neighbors(tuple(curr), neighbor_method=nm) or [] + if nbrs: + nsel = min(len(nbrs), self.cand_pool // 2) + pool += random.sample(nbrs, nsel) + + # elite crossover + if len(elite) >= 2: + (_, x1), (_, x2) = random.sample(elite, 2) + child = [random.choice((a, b)) for a, b in zip(x1, x2)] + pool.append(tuple(child)) + + # random fill + while len(pool) < self.cand_pool: + pool.append(tuple(self.searchspace.get_random_sample(1)[0])) + + # repair + deduplicate + seen = set() + clean = [] + for c in pool: + rc = tuple(self.repair(list(c))) + if rc not in seen: + seen.add(rc) + clean.append(rc) + + # surrogate scoring + scored = [] + for c in clean: + s = self.knn_predict(c, history) + if c in tabu: + s += abs(s) * 0.1 + 1e3 + scored.append((s, c)) + + _, cand_tpl = min(scored, key=lambda x: x[0]) + cand = list(cand_tpl) + + try: + f_c = self.evaluate(cand) + num_evals += 1 + except StopCriterionReached: + raise + + history.append((tuple(cand), f_c)) + + # update elite + heapq.heappush(elite, (f_c, tuple(cand))) + if len(elite) > self.elite_size: + heapq.heappop(elite) + + # acceptance (SA-style) + delta = f_c - curr_f + accept = (delta < 0) or (random.random() < math.exp(-delta / max(temp, 1e-8))) + + if accept: + tabu.append(tuple(cand)) + curr, curr_f = list(cand), f_c + + nm_weight[nm] *= 1.1 + no_improve = 0 + + if f_c < best_f: + best, best_f = list(cand), f_c + else: + nm_weight[nm] *= 0.9 + no_improve += 1 + + # normalize weights + if sum(nm_weight.values()) > 1e6: + for k in nm_weight: + nm_weight[k] *= 1e-6 + + temp *= self.cooling + + # restart + if no_improve >= self.restart_iter: + curr = self.sample_valid() + curr_f = self.evaluate(curr) + num_evals += 1 + + history.append((tuple(curr), curr_f)) + + tabu.clear() + tabu.append(tuple(curr)) + + no_improve = 0 + temp = self.temp0 + + if self.verbose and num_evals % 50 == 0: + print(f"Evaluations: {num_evals}, best: {best_f}") \ No newline at end of file diff --git a/test/strategies/test_strategies.py b/test/strategies/test_strategies.py index ea5a2994..56dfc7b8 100644 --- a/test/strategies/test_strategies.py +++ b/test/strategies/test_strategies.py @@ -127,7 +127,7 @@ def test_strategies(vector_add, strategy): # check if strategy respects user-specified starting point (x0) x0 = [256, 'alg_2', 15, True, 2.45] filter_options["x0"] = x0 - if not strategy in ["brute_force", "random_sample", "bayes_opt", "pyatf_strategies"]: + if not strategy in ["brute_force", "random_sample", "bayes_opt", "pyatf_strategies", "adaptive_tabu_greywolf", "hybrid_vndx"]: results, _ = kernel_tuner.tune_kernel(*vector_add, restrictions=restrictions, strategy=strategy, strategy_options=filter_options, verbose=False, cache=cache_filename, simulation_mode=True) assert results[0]["block_size_x"] == x0[0]