Skip to content

Commit

Permalink
Bpcg with direct solve and wolfe step
Browse files Browse the repository at this point in the history
Added Wolfe step
  • Loading branch information
pokutta authored Nov 6, 2024
1 parent 3bcaffd commit 013dc15
Show file tree
Hide file tree
Showing 17 changed files with 285 additions and 84 deletions.
138 changes: 138 additions & 0 deletions examples/blended_pairwise_with_direct_solve_wolfe.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
#=
This example demonstrates the use of the Blended Pairwise Conditional Gradient algorithm
with direct solve steps for a quadratic optimization problem over a sparse polytope which is not standard quadratic.
The example showcases how the algorithm balances between:
- Pairwise steps for efficient optimization
- Periodic direct solves for handling the quadratic objective
- Lazy (approximate) linear minimization steps for improved iteration complexity
It also demonstrates how to set up custom callbacks for tracking algorithm progress.
=#

using FrankWolfe
using LinearAlgebra
using Random

import HiGHS
import MathOptInterface as MOI

include("../examples/plot_utils.jl")

n = Int(1e2)
k = 10000

# s = rand(1:100)
s = 10
@info "Seed $s"
Random.seed!(s)

A = let
A = randn(n, n)
A' * A
end
@assert isposdef(A) == true

const y = Random.rand(Bool, n) * 0.6 .+ 0.3

function f(x)
d = x - y
return dot(d, A, d)
end

function grad!(storage, x)
mul!(storage, A, x)
return mul!(storage, A, y, -2, 2)
end


xpi = rand(n);
total = sum(xpi);

xp = xpi ./ total;

f(x) = norm(x - xp)^2
function grad!(storage, x)
@. storage = 2 * (x - xp)
end

lmo = FrankWolfe.KSparseLMO(5, 500.0)

## other LMOs to try
lmo = FrankWolfe.KSparseLMO(10, big"500.0")
# lmo = FrankWolfe.LpNormLMO{Float64,5}(100.0)
# lmo = FrankWolfe.ProbabilitySimplexOracle(100.0);
# lmo = FrankWolfe.UnitSimplexOracle(10000.0);

x00 = FrankWolfe.compute_extreme_point(lmo, rand(n))


function build_callback(trajectory_arr)
return function callback(state, active_set, args...)
return push!(trajectory_arr, (FrankWolfe.callback_state(state)..., length(active_set)))
end
end


trajectoryBPCG_standard = []
x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
f,
grad!,
lmo,
copy(x00),
max_iteration=k,
verbose=true,
callback=build_callback(trajectoryBPCG_standard),
);


active_set_quadratic_automatic_standard = FrankWolfe.ActiveSetQuadraticLinearSolve(
FrankWolfe.ActiveSet([(1.0, copy(x00))]),
grad!,
MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)),
)
trajectoryBPCG_quadratic_automatic_standard = []
x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
f,
grad!,
lmo,
active_set_quadratic_automatic_standard,
max_iteration=k,
verbose=true,
callback=build_callback(trajectoryBPCG_quadratic_automatic_standard),
);


active_set_quadratic_wolfe = FrankWolfe.ActiveSetQuadraticLinearSolve(
FrankWolfe.ActiveSet([(1.0, copy(x00))]),
2I, -2xp,
MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)),
scheduler=FrankWolfe.LogScheduler(start_time=10, scaling_factor=1),
wolfe_step=true,
)
trajectoryBPCG_quadratic_wolfe = []
x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
f,
grad!,
lmo,
active_set_quadratic_wolfe,
max_iteration=k,
verbose=true,
callback=build_callback(trajectoryBPCG_quadratic_wolfe),
);

dataSparsity = [
trajectoryBPCG_standard,
trajectoryBPCG_quadratic_automatic_standard,
trajectoryBPCG_quadratic_wolfe,
]
labelSparsity = [
"BPCG (Standard)",
"AS_Standard",
"AS_Wolfe",
]

# Plot trajectories
plot_trajectories(dataSparsity, labelSparsity, xscalelog=false)

# plot_sparsity(dataSparsity, labelSparsity, xscalelog=false)
88 changes: 75 additions & 13 deletions src/active_set_quadratic_direct_solve.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

"""
ActiveSetQuadraticLinearSolve{AT, R, IT}
Expand All @@ -12,6 +11,8 @@ so that the gradient is `∇f(x)=Ax+b`.
This active set stores an inner `active_set` that keeps track of the current set of vertices and convex decomposition.
It therefore delegates all update, deletion, and addition operations to this inner active set.
The `weight`, `atoms`, and `x` fields should only be accessed to read and are effectively the same objects as those in the inner active set.
The flag `wolfe_step` determines whether to use a Wolfe step from the min-norm point algorithm or the normal direct solve.
The Wolfe step solves the auxiliary subproblem over the affine hull of the current active set (instead of the convex hull).
The structure also contains a scheduler struct which is called with the `should_solve_lp` function.
To define a new frequency at which the LP should be solved, one can define another scheduler struct and implement the corresponding method.
Expand All @@ -33,6 +34,7 @@ struct ActiveSetQuadraticLinearSolve{
b::BT # linear term
active_set::AS
lp_optimizer::OT
wolfe_step::Bool
scheduler::SF
counter::Base.RefValue{Int}
end
Expand All @@ -47,9 +49,10 @@ function ActiveSetQuadraticLinearSolve(
grad!::Function,
lp_optimizer;
scheduler=LogScheduler(),
wolfe_step=false,
) where {AT,R}
A, b = detect_quadratic_function(grad!, tuple_values[1][2])
return ActiveSetQuadraticLinearSolve(tuple_values, A, b, lp_optimizer, scheduler=scheduler)
return ActiveSetQuadraticLinearSolve(tuple_values, A, b, lp_optimizer, scheduler=scheduler, wolfe_step=wolfe_step)
end

"""
Expand All @@ -63,6 +66,7 @@ function ActiveSetQuadraticLinearSolve(
b,
lp_optimizer;
scheduler=LogScheduler(),
wolfe_step=false,
) where {AT,R,H}
inner_as = ActiveSetQuadraticProductCaching(tuple_values, A, b)
return ActiveSetQuadraticLinearSolve(
Expand All @@ -73,6 +77,7 @@ function ActiveSetQuadraticLinearSolve(
inner_as.b,
inner_as,
lp_optimizer,
wolfe_step,
scheduler,
Ref(0),
)
Expand All @@ -84,6 +89,7 @@ function ActiveSetQuadraticLinearSolve(
b,
lp_optimizer;
scheduler=LogScheduler(),
wolfe_step=false,
)
as = ActiveSetQuadraticLinearSolve(
inner_as.weights,
Expand All @@ -93,6 +99,7 @@ function ActiveSetQuadraticLinearSolve(
b,
inner_as,
lp_optimizer,
wolfe_step,
scheduler,
Ref(0),
)
Expand All @@ -106,6 +113,7 @@ function ActiveSetQuadraticLinearSolve(
b,
lp_optimizer;
scheduler=LogScheduler(),
wolfe_step=false,
)
as = ActiveSetQuadraticLinearSolve(
inner_as.weights,
Expand All @@ -115,6 +123,7 @@ function ActiveSetQuadraticLinearSolve(
b,
inner_as,
lp_optimizer,
wolfe_step,
scheduler,
Ref(0),
)
Expand All @@ -127,9 +136,10 @@ function ActiveSetQuadraticLinearSolve(
grad!::Function,
lp_optimizer;
scheduler=LogScheduler(),
wolfe_step=false,
)
A, b = detect_quadratic_function(grad!, inner_as.atoms[1])
return ActiveSetQuadraticLinearSolve(inner_as, A, b, lp_optimizer; scheduler=scheduler)
return ActiveSetQuadraticLinearSolve(inner_as, A, b, lp_optimizer; scheduler=scheduler, wolfe_step=wolfe_step)
end

function ActiveSetQuadraticLinearSolve{AT,R}(
Expand Down Expand Up @@ -269,7 +279,9 @@ function solve_quadratic_activeset_lp!(
MOI.empty!(o)
λ = MOI.add_variables(o, nv)
# λ ≥ 0, ∑ λ == 1
MOI.add_constraint.(o, λ, MOI.GreaterThan(0.0))
if !as.wolfe_step
MOI.add_constraint.(o, λ, MOI.GreaterThan(0.0))
end
sum_of_variables = MOI.ScalarAffineFunction(MOI.ScalarAffineTerm.(1.0, λ), 0.0)
MOI.add_constraint(o, sum_of_variables, MOI.EqualTo(1.0))
# Vᵗ A V λ == -Vᵗ b
Expand All @@ -284,15 +296,30 @@ function solve_quadratic_activeset_lp!(
)
end
rhs = -dot(atom, as.b)
MOI.add_constraint(o, lhs, MOI.EqualTo(rhs))
MOI.add_constraint(o, lhs, MOI.EqualTo{Float64}(rhs))
end
MOI.set(o, MOI.ObjectiveFunction{typeof(sum_of_variables)}(), sum_of_variables)
MOI.set(o, MOI.ObjectiveSense(), MOI.MIN_SENSE)
MOI.optimize!(o)
if MOI.get(o, MOI.TerminationStatus()) (MOI.OPTIMAL, MOI.FEASIBLE_POINT, MOI.ALMOST_OPTIMAL)
return as
end
indices_to_remove = Int[]
indices_to_remove, new_weights = if as.wolfe_step
_compute_new_weights_wolfe_step(λ, R, as.weights, o)
else
_compute_new_weights_direct_solve(λ, R, o)
end
deleteat!(as.active_set, indices_to_remove)
@assert length(as) == length(new_weights)
update_weights!(as.active_set, new_weights)
active_set_cleanup!(as)
active_set_renormalize!(as)
compute_active_set_iterate!(as)
return as
end

function _compute_new_weights_direct_solve(λ, ::Type{R}, o::MOI.AbstractOptimizer) where {R}
indices_to_remove = BitSet()
new_weights = R[]
for idx in eachindex(λ)
weight_value = MOI.get(o, MOI.VariablePrimal(), λ[idx])
Expand All @@ -302,13 +329,48 @@ function solve_quadratic_activeset_lp!(
push!(new_weights, weight_value)
end
end
deleteat!(as.active_set, indices_to_remove)
@assert length(as) == length(new_weights)
update_weights!(as.active_set, new_weights)
active_set_cleanup!(as)
active_set_renormalize!(as)
compute_active_set_iterate!(as)
return as
return indices_to_remove, new_weights
end

function _compute_new_weights_wolfe_step(λ, ::Type{R}, old_weights, o::MOI.AbstractOptimizer) where {R}
wolfe_weights = MOI.get.(o, MOI.VariablePrimal(), λ)
# all nonnegative -> use non-wolfe procedure
if all(>=(-10eps()), wolfe_weights)
return _compute_new_weights_direct_solve(λ, R, o)
end
# ratio test to know which coordinate would hit zero first
tau_min = 1.0
set_indices_zero = BitSet()
for idx in eachindex(λ)
if wolfe_weights[idx] < old_weights[idx]
tau = old_weights[idx] / (old_weights[idx] - wolfe_weights[idx])
if abs(tau - tau_min) 2weight_purge_threshold_default(typeof(tau))
push!(set_indices_zero, idx)
elseif tau < tau_min
tau_min = tau
empty!(set_indices_zero)
push!(set_indices_zero, idx)
end
end
end
@assert length(set_indices_zero) >= 1
new_lambdas = [(1 - tau_min) * old_weights[idx] + tau_min * wolfe_weights[idx] for idx in eachindex(λ)]
for idx in set_indices_zero
new_lambdas[idx] = 0
end
@assert all(>=(-2weight_purge_threshold_default(eltype(new_lambdas))), new_lambdas) "All new_lambdas must be between nonnegative $(minimum(new_lambdas))"
@assert isapprox(sum(new_lambdas), 1.0) "The sum of new_lambdas must be approximately 1"
indices_to_remove = Int[]
new_weights = R[]
for idx in eachindex(λ)
weight_value = new_lambdas[idx] # using new lambdas
if weight_value <= eps()
push!(indices_to_remove, idx)
else
push!(new_weights, weight_value)
end
end
return indices_to_remove, new_weights
end

function _compute_quadratic_constraint_term(atom1, A::AbstractMatrix, atom2, λ)
Expand Down
2 changes: 2 additions & 0 deletions src/types.jl
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,8 @@ function Base.copyto!(dst::SparseArrays.SparseVector, src::ScaledHotVector)
return dst
end

SparseArrays.issparse(::ScaledHotVector) = true

function active_set_update_scale!(x::IT, lambda, atom::ScaledHotVector) where {IT}
x .*= (1 - lambda)
x[atom.val_idx] += lambda * atom.active_val
Expand Down
12 changes: 6 additions & 6 deletions test/active_set.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ using FrankWolfe
import FrankWolfe: ActiveSet
using LinearAlgebra: norm

@testset "Active sets " begin
@testset "Active sets" begin

@testset "Constructors and eltypes" begin
active_set = ActiveSet([(0.1, [1, 2, 3]), (0.9, [2, 3, 4]), (0.0, [5, 6, 7])])
Expand Down Expand Up @@ -100,7 +100,7 @@ using LinearAlgebra: norm
end
end

@testset "Simplex gradient descent " begin
@testset "Simplex gradient descent" begin
# Gradient descent over a 2-D unit simplex
# each atom is a vertex, direction points to [1,1]
# note: integers for atom element types
Expand Down Expand Up @@ -175,7 +175,7 @@ end
end
end

@testset "LP separation oracle " begin
@testset "LP separation oracle" begin
# Gradient descent over a L-inf ball of radius one
# current active set contains 3 vertices
# direction points to [1,1]
Expand Down Expand Up @@ -212,7 +212,7 @@ end
)
end

@testset "Argminmax " begin
@testset "Argminmax" begin
active_set = FrankWolfe.ActiveSet([(0.6, [-1, -1]), (0.2, [0, 1]), (0.2, [1, 0])])
(λ_min, a_min, i_min, val, λ_max, a_max, i_max, valM, progress) =
FrankWolfe.active_set_argminmax(active_set, [1, 1.5])
Expand All @@ -221,7 +221,7 @@ end
@test_throws ErrorException FrankWolfe.active_set_argminmax(active_set, [NaN, NaN])
end

@testset "LPseparationWithScaledHotVector " begin
@testset "LPseparationWithScaledHotVector" begin
v1 = FrankWolfe.ScaledHotVector(1, 1, 2)
v2 = FrankWolfe.ScaledHotVector(1, 2, 2)
v3 = FrankWolfe.ScaledHotVector(0, 2, 2)
Expand All @@ -240,7 +240,7 @@ end
)
end

@testset "ActiveSet for BigFloat " begin
@testset "ActiveSet for BigFloat" begin
n = Int(1e2)
lmo = FrankWolfe.LpNormLMO{BigFloat,1}(rand())
x0 = Vector(FrankWolfe.compute_extreme_point(lmo, zeros(n)))
Expand Down
Loading

0 comments on commit 013dc15

Please sign in to comment.