diff --git a/Project.toml b/Project.toml index da5086c..a2d0710 100644 --- a/Project.toml +++ b/Project.toml @@ -24,6 +24,7 @@ MLDatasets = "^0.7.4" NLPModels = "0.16, 0.17, 0.18, 0.19, 0.20, 0.21" NLPModelsModifiers = "0.7.3" Noise = "0.2" +QuadraticModels = "0.9.15" Requires = "1" julia = "^1.6.0" diff --git a/src/RegularizedProblems.jl b/src/RegularizedProblems.jl index 34ebfb2..ee8b0a8 100644 --- a/src/RegularizedProblems.jl +++ b/src/RegularizedProblems.jl @@ -44,6 +44,9 @@ function __init__() @require ProximalOperators = "a725b495-10eb-56fe-b38b-717eba820537" begin include("testset_qp_rand.jl") end + @require ShiftedProximalOperators = "d4fd37fa-580c-4e43-9b30-361c21aae263" begin + include("shiftedProximableNLPModel.jl") + end end end diff --git a/src/shiftedProximableNLPModel.jl b/src/shiftedProximableNLPModel.jl new file mode 100644 index 0000000..9b58b36 --- /dev/null +++ b/src/shiftedProximableNLPModel.jl @@ -0,0 +1,225 @@ +export AbstractShiftedProximableNLPModel, ShiftedProximableQuadraticNLPModel +export set_sigma!, get_sigma + +abstract type AbstractShiftedProximableNLPModel{T, V} <: AbstractRegularizedNLPModel{T, V} end + +""" + subproblem = ShiftedProximableQuadraticNLPModel(reg_nlp, x; kwargs...) + +Given a regularized NLP model `reg_nlp` representing the problem + + minimize f(x) + h(x) subject to l ≤ x ≤ u, + +construct a shifted quadratic model around `x`: + + minimize φ(s; x) + ½ σ ‖s‖² + ψ(s; x) + χ(s; [l - x, u - x] ∩ ΔB ) + +where φ(s ; x) = f(x) + ∇f(x)ᵀs + ½ sᵀBs is a quadratic approximation of f about x, +ψ(s; x) is either h(x + s) or an approximation of h(x + s), +‖⋅‖ is the ℓ₂ norm and σ > 0 is the regularization parameter, +χ(s; [l - x, u - x] ∩ ΔB ) is an indicator function over the intersection of the box [l - x, u - x] and the ball ΔB of radius Δ in the infinity norm. +In the case where there are no bounds (i.e., l = -∞ and u = +∞), the ball can be defined in any norm. + +The ShiftedProximableQuadraticNLPModel is made of the following components: + +- `model <: AbstractNLPModel`: represents φ + ½ σ ‖s‖², the quadratic approximation of the smooth part of the objective function (up to the constant term f(x)); +- `h <: ShiftedProximableFunction`: represents ψ, the shifted version of the nonsmooth part of the model; +- `selected`: the subset of variables to which the regularizer h should be applied (default: all). +- `χ`: the indicator function of the intersection between the box defined by the bounds and the ball defined by the trust region radius. +- `Δ`: the trust region radius. +- `parent`: the original regularized NLP model from which the subproblem was derived. + +# Arguments +- `reg_nlp::AbstractRegularizedNLPModel{T, V}`: the regularized NLP model for which the subproblem is being constructed. +- `x::V`: the point around which the quadratic model is constructed. + +# Keyword Arguments +- `∇f::VNG = nothing`: the gradient of the smooth part of the objective function at `x`. If not provided, it will be computed. +- `indicator_type::Symbol = :none`: the type of indicator function to use for χ. It can be one of `:box`, `:ball`, or `:none`. + - If `:box`, χ is the indicator function over a box. + - If `:ball`, χ is the indicator function over a ball. + - If `:none`, χ is not included in the model. +- `tr_norm = NormLinf(T(1))`: the norm to use for the trust region when `indicator_type` is `:ball`. When `indicator_type` is `:box` or `:none`, this argument is ignored. +- `Δ::T = T(Inf)`: the radius of the trust region. When `indicator_type` is `:none`, this argument is ignored. + +The matrix B is constructed as a `LinearOperator` and is the returned value of `hess_op(reg_nlp, x)` (see https://jso.dev/NLPModels.jl/stable/reference/#NLPModels.hess_op). +φ is constructed as a `QuadraticModel`, (see https://github.com/JuliaSmoothOptimizers/QuadraticModels.jl). +When there are bounds, the shifted bounds l-x and u-x are stored in the metadata of the quadratic model φ. + +# NLPModels Interface +The `ShiftedProximableQuadraticNLPModel` implements the `obj` function from the `NLPModels` interface, which evaluates the objective function φ(s; x) + ½ σ ‖s‖² + ψ(s; x) at a given point s. +The `obj` function has two additional optional keyword arguments: `skip_sigma` (default: false) and `cauchy` (default: false). +If `skip_sigma` is true, the term ½ σ ‖s‖² is not included in the evaluation of the objective. +If `cauchy` is true, the term `B` is not included in the evaluation of the objective. +""" +mutable struct ShiftedProximableQuadraticNLPModel{T, V, M <: AbstractNLPModel{T, V}, H <: ShiftedProximalOperators.ShiftedProximableFunction, I, X, P <: AbstractRegularizedNLPModel{T, V}} <: + AbstractShiftedProximableNLPModel{T, V} + model::M + h::H + selected::I + χ::X + Δ::T + parent::P +end + +function ShiftedProximableQuadraticNLPModel( + reg_nlp::AbstractRegularizedNLPModel{T, V}, + x::V; + ∇f::VN = nothing, + indicator_type::Symbol = :none, + tr_norm = ProximalOperators.NormLinf(T(1)), + Δ::T = T(Inf), +) where {T, V, VN <: Union{V, Nothing}} + @assert indicator_type ∈ (:box, :ball, :none) "indicator_type must be one of :box, :ball, or :none" + + nlp, h, selected = reg_nlp.model, reg_nlp.h, reg_nlp.selected + + # φ(s) + ½ σ ‖s‖² + B = hess_op(reg_nlp, x) + isnothing(∇f) && (∇f = grad(nlp, x)) + φ = QuadraticModel(∇f, B, x0 = x, regularize = true) + + # χ(s) + l_bound_m_x, u_bound_m_x = φ.meta.lvar, φ.meta.uvar + indicator_type = has_bounds(nlp) ? :box : indicator_type + χ = nothing + if indicator_type == :box + χ = BoxIndicatorFunction(zero(l_bound_m_x), zero(u_bound_m_x)) + @. χ.l = max(nlp.meta.lvar - x, -Δ) + @. χ.u = min(nlp.meta.uvar - x, Δ) + elseif indicator_type == :ball + χ = BallIndicatorFunction(Δ, tr_norm) + end + + # ψ(s) + χ(s) + # FIXME: the indicator function logic can (and should) be simplified in `ShiftedProximalOperators.jl`... + # FIXME: `shifted` call ignores the `selected` argument when there are no bounds! + ψ = indicator_type == :box ? + ShiftedProximalOperators.shifted(h, x, χ.l, χ.u, selected) : + indicator_type == :ball ? + ShiftedProximalOperators.shifted(h, x, χ.Δ, χ.norm) : + ShiftedProximalOperators.shifted(h, x) + + ShiftedProximableQuadraticNLPModel(φ, ψ, selected, χ, Δ, reg_nlp) +end + +""" + shift!(reg_nlp::ShiftedProximableQuadraticNLPModel, x; compute_grad = true) + +Update the shifted quadratic model `reg_nlp` at the point `x`. +i.e. given the shifted quadratic model around `y`: + + minimize φ(s; y) + ½ σ ‖s‖² + ψ(s; y), + +update it to be around `x`: + + minimize φ(s; x) + ½ σ ‖s‖² + ψ(s; x). + +# Arguments +- `reg_nlp::ShiftedProximableQuadraticNLPModel`: the shifted quadratic model to be updated. +- `x::V`: the point around which the shifted quadratic model should be updated. + +# Keyword Arguments +- `compute_grad::Bool = true`: whether the gradient of the smooth part of the model should be updated. +""" +function ShiftedProximalOperators.shift!( + reg_nlp::ShiftedProximableQuadraticNLPModel{T, V}, + x::V; + compute_grad::Bool = true +) where{T, V} + nlp, h = reg_nlp.parent.model, reg_nlp.parent.h + φ, ψ, χ = reg_nlp.model, reg_nlp.h, reg_nlp.χ + + if isa(χ, BoxIndicatorFunction) + @. φ.meta.lvar = nlp.meta.lvar - x + @. φ.meta.uvar = nlp.meta.uvar - x + ShiftedProximalOperators.set_radius!(reg_nlp, reg_nlp.Δ) + end + + ShiftedProximalOperators.shift!(ψ, x) + + g = φ.data.c + compute_grad && grad!(nlp, x, g) + + φ.data.H = hess_op(nlp, x) +end + +function NLPModels.obj(reg_nlp::AbstractShiftedProximableNLPModel, s::AbstractVector; skip_sigma::Bool = false, cauchy::Bool = false) + φ, ψ = reg_nlp.model, reg_nlp.h + + σ_temp = get_sigma(reg_nlp) + σ_c = skip_sigma ? zero(σ_temp) : σ_temp + set_sigma!(reg_nlp, σ_c) + + φs = cauchy ? dot(φ.data.c, s) + σ_c * dot(s, s)/2 : obj(φ, s) + ψs = ψ(s) + + set_sigma!(reg_nlp, σ_temp) # restore original σ + + return φs + ψs +end + +function get_sigma(reg_nlp::ShiftedProximableQuadraticNLPModel{T, V}) where {T, V} + φ = reg_nlp.model + return φ.data.σ +end + +function set_sigma!( + reg_nlp::ShiftedProximableQuadraticNLPModel{T, V}, + σ::T +) where {T, V} + φ = reg_nlp.model + φ.data.σ = σ +end + +function ShiftedProximalOperators.set_radius!( + reg_nlp::ShiftedProximableQuadraticNLPModel{T, V}, + Δ::T +) where {T, V} + φ, ψ, χ = reg_nlp.model, reg_nlp.h, reg_nlp.χ + + # Update Radius + reg_nlp.Δ = Δ + + # Update Bounds if necessary + if isa(χ, BallIndicatorFunction) + ShiftedProximalOperators.set_radius!(ψ, Δ) + χ.Δ = Δ + elseif isa(χ, BoxIndicatorFunction) + @. χ.l = max(φ.meta.lvar, -Δ) + @. χ.u = min(φ.meta.uvar, Δ) + end + +end + +# Forward meta getters so they grab info from the smooth model +for field ∈ fieldnames(NLPModels.NLPModelMeta) + meth = Symbol("get_", field) + if field == :name + @eval NLPModels.$meth(rnlp::ShiftedProximableQuadraticNLPModel) = + NLPModels.$meth(rnlp.model) * "/" * string(typeof(rnlp.h).name.wrapper) + else + @eval NLPModels.$meth(rnlp::ShiftedProximableQuadraticNLPModel) = NLPModels.$meth(rnlp.model) + end +end + +# Forward counter getters so they grab info from the smooth model +for model_type ∈ (ShiftedProximableQuadraticNLPModel,) + for counter in fieldnames(Counters) + @eval NLPModels.$counter(rnlp::$model_type) = NLPModels.$counter(rnlp.model) + end +end + +# Indicator functions logic +abstract type AbstractIndicatorFunction end + +mutable struct BoxIndicatorFunction{V} <: AbstractIndicatorFunction + l::V + u::V +end + +mutable struct BallIndicatorFunction{T, N} <: AbstractIndicatorFunction + Δ::T + norm::N +end + diff --git a/test/rmodel_tests.jl b/test/rmodel_tests.jl index 4cf01d5..7017fd2 100644 --- a/test/rmodel_tests.jl +++ b/test/rmodel_tests.jl @@ -24,6 +24,83 @@ using ProximalOperators @test neval_grad(rmodel_lbfgs) == 0 end +@testset "ShiftedProximableQuadraticNLPModel" begin + + # Test for quadratic regularization + for bounds in (false, true) + model, nls_model, x0 = bpdn_model(1, bounds = bounds) + s = ones(get_nvar(model)) + h = NormL0(1.0) + rmodel = RegularizedNLPModel(LBFGSModel(model), h) + subproblem = ShiftedProximableQuadraticNLPModel(rmodel, x0) + + @test get_nvar(subproblem) == get_nvar(rmodel) + + obj(subproblem, s) + @test neval_obj(subproblem) == neval_obj(subproblem.model) + + @test get_sigma(subproblem) == 0.0 + set_sigma!(subproblem, 1.0) + @test get_sigma(subproblem) == 1.0 + + @test obj(subproblem, s; skip_sigma = false) == obj(subproblem, s; skip_sigma = true) + 0.5 * dot(s, s) + @test obj(subproblem, s; cauchy = true) == dot(subproblem.model.data.c, s) + 0.5 * dot(s, s) + subproblem.h(s) + @test obj(subproblem, s; cauchy = true, skip_sigma = true) == dot(subproblem.model.data.c, s) + subproblem.h(s) + + set_sigma!(subproblem, 0.0) + @test obj(subproblem, s; skip_sigma = false) == obj(subproblem, s; skip_sigma = true) + @test obj(subproblem, s; cauchy = true, skip_sigma = false) == obj(subproblem, s; cauchy = true, skip_sigma = true) + + x = randn(get_nvar(model)) + shift!(subproblem, x) + @test subproblem.model.data.c == grad(model, x) + @test typeof(subproblem.model.data.H) <: LBFGSOperator + if bounds == true + @test all(subproblem.χ.l .== model.meta.lvar - x) + @test all(subproblem.χ.u .== model.meta.uvar - x) + end + + # Test allocations + @test (@wrappedallocs obj(subproblem, s)) == 0 + @test (@wrappedallocs shift!(subproblem, x)) == 0 + end + + # Test for trust regions + for bounds in (false, true) + model, nls_model, x0 = bpdn_model(1, bounds = bounds) + s = ones(get_nvar(model)) + h = NormL1(1.0) + rmodel = RegularizedNLPModel(LBFGSModel(model), h) + subproblem = ShiftedProximableQuadraticNLPModel(rmodel, x0, indicator_type = :ball, tr_norm = NormL2(1.0), Δ = 1.0) + + @test get_nvar(subproblem) == get_nvar(rmodel) + + obj(subproblem, s) + @test neval_obj(subproblem) == neval_obj(subproblem.model) + + @test subproblem.Δ == 1.0 + set_radius!(subproblem, 2.0) + @test subproblem.Δ == 2.0 + + @test obj(subproblem, s; cauchy = true, skip_sigma = true) == dot(subproblem.model.data.c, s) + subproblem.h(s) + @test obj(subproblem, s; skip_sigma = false) == obj(subproblem, s; skip_sigma = true) + @test obj(subproblem, s; cauchy = true, skip_sigma = false) == obj(subproblem, s; cauchy = true, skip_sigma = true) + + x = randn(get_nvar(model)) + shift!(subproblem, x) + @test subproblem.model.data.c == grad(model, x) + @test typeof(subproblem.model.data.H) <: LBFGSOperator + if bounds == true + @test all(subproblem.χ.l .== max.(model.meta.lvar - x, -subproblem.Δ)) + @test all(subproblem.χ.u .== min.(model.meta.uvar - x, subproblem.Δ)) + end + + # Test allocations + #@test (@wrappedallocs obj(subproblem, s)) == 0 #FIXME: NormL1B2 allocates... + @test (@wrappedallocs shift!(subproblem, x)) == 0 + end +end + @testset "Problem combos" begin # Test that we can at least instantiate the models rnlp, rnls = setup_bpdn_l0() diff --git a/test/runtests.jl b/test/runtests.jl index 26f6d60..599d606 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -6,9 +6,21 @@ using ADNLPModels, MLDatasets, NLPModels, NLPModelsModifiers, - QuadraticModels + QuadraticModels, + ShiftedProximalOperators using RegularizedProblems +macro wrappedallocs(expr) + argnames = [gensym() for a in expr.args] + quote + function g($(argnames...)) + $(Expr(expr.head, argnames...)) + @allocated $(Expr(expr.head, argnames...)) + end + $(Expr(:call, :g, [esc(a) for a in expr.args]...)) + end +end + function test_well_defined(model, nls_model, sol) @test typeof(model) <: NLPModel @test typeof(sol) == typeof(model.meta.x0)