Skip to content

Commit

Permalink
Merge branch 'dicg' into dicg_MOI_test
Browse files Browse the repository at this point in the history
  • Loading branch information
dhendryc authored Nov 7, 2024
2 parents ce0a38d + 746269b commit 43e4647
Show file tree
Hide file tree
Showing 51 changed files with 1,715 additions and 335 deletions.
1 change: 1 addition & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ DynamicPolynomials = "7c1d4256-1411-5781-91ec-d7bc3513ac07"
FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000"
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
GLPK = "60bf3e95-4087-53dc-ae20-288a0d20c6a6"
HiGHS = "87dc4568-4c63-4d18-b0c0-bb2238e4078b"
Hypatia = "b99e6be6-89ff-11e8-14f8-45c827f4f8f2"
JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
JuMP = "4076af6c-e467-56ae-b986-b466b2749572"
Expand Down
4 changes: 2 additions & 2 deletions docs/src/advanced.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ See [Extra-lazification](@ref) for a complete example.

## Specialized active set for quadratic functions

If the objective function is quadratic, a considerable speedup can be obtained by using the structure `ActiveSetQuadratic`.
If the objective function is quadratic, a considerable speedup can be obtained by using the structure `ActiveSetQuadraticProductCaching`.
It relies on the storage of various scalar products to efficiently determine the best (and worst for `blended_pairwise_conditional_gradient`) atom in the active set without the need of computing many scalar products in each iteration.
The user should provide the Hessian matrix `A` as well as the linear part `b` of the function, such that:
```math
Expand Down Expand Up @@ -228,6 +228,6 @@ This subspace is the image of the Reynolds operator defined by
\mathcal{R}(x)=\frac{1}{|G|}\sum_{g\in G}g\cdot x.
```

In practice, the type `SymmetricLMO` allows the user to provide the Reynolds operator $\mathcal{R}$ as well as its adjoint $\mathcal{R}^\ast$.
In practice, the type `SubspaceLMO` allows the user to provide the Reynolds operator $\mathcal{R}$ as well as its adjoint $\mathcal{R}^\ast$.
The gradient is symmetrised with $\mathcal{R}^\ast$, then passed to the non-symmetric LMO, and the resulting output is symmetrised with $\mathcal{R}$.
In many cases, the gradient is already symmetric so that `reynolds_adjoint(gradient, lmo) = gradient` is a fast and valid choice.
2 changes: 1 addition & 1 deletion docs/src/reference/3_backend.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

```@autodocs
Modules = [FrankWolfe]
Pages = ["active_set.jl"]
Pages = ["active_set.jl", "active_set_quadratic.jl", "active_set_quadratic_direct_solve.jl", "active_set_sparsifier.jl"]
```

## Functions and gradients
Expand Down
37 changes: 37 additions & 0 deletions examples/alternating_projections.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import FrankWolfe
using LinearAlgebra
using Random


include("../examples/plot_utils.jl")

Random.seed!(100)

n = 500
lmo = FrankWolfe.ConvexHullOracle([rand(Float64, (n,)) .+ 1.0 for _ in 1:n])
lmo2 = FrankWolfe.ConvexHullOracle([rand(Float64, (n,)) .- 1.0 for _ in 1:n])


trajectories = []

methods = [FrankWolfe.frank_wolfe, FrankWolfe.blended_pairwise_conditional_gradient, FrankWolfe.blended_pairwise_conditional_gradient]

for (i,m) in enumerate(methods)
if i == 1
x, _, _, _, traj_data = FrankWolfe.alternating_projections((lmo, lmo2), ones(n); verbose=true, print_iter=100, trajectory=true, proj_method=m)
elseif i== 2
x, _, _, _, traj_data = FrankWolfe.alternating_projections((lmo, lmo2), ones(n); verbose=true, print_iter=100, trajectory=true, proj_method=m, reuse_active_set=false, lazy=true)
else
x, _, _, _, traj_data = FrankWolfe.alternating_projections((lmo, lmo2), ones(n); verbose=true, print_iter=100, trajectory=true, proj_method=m, reuse_active_set=true, lazy=true)
end
push!(trajectories, traj_data)
end



labels = ["FW", "BPCG" ,"BPCG (reuse)"]

plot_trajectories(trajectories, labels, xscalelog=true)



2 changes: 1 addition & 1 deletion examples/birkhoff_polytope.jl
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ FrankWolfe.benchmark_oracles(
x -> cf(x, xp, normxp2),
(str, x) -> cgrad!(str, x, xp),
lmo,
FrankWolfe.ActiveSetQuadratic([(1.0, collect(x0))], 2I/n^2, -2xp/n^2); # surprisingly faster and more memory efficient with collect
FrankWolfe.ActiveSetQuadraticProductCaching([(1.0, collect(x0))], 2I/n^2, -2xp/n^2); # surprisingly faster and more memory efficient with collect
max_iteration=k,
line_search=FrankWolfe.Shortstep(2/n^2),
lazy=true,
Expand Down
32 changes: 16 additions & 16 deletions examples/birkhoff_polytope_symmetric.jl
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ x0 = FrankWolfe.compute_extreme_point(lmo_nat, randn(n, n))
x -> cf(x, xp, normxp2),
(str, x) -> cgrad!(str, x, xp),
lmo_nat,
FrankWolfe.ActiveSetQuadratic([(1.0, x0)], 2I/n^2, -2xp/n^2);
FrankWolfe.ActiveSetQuadraticProductCaching([(1.0, x0)], 2I/n^2, -2xp/n^2);
max_iteration=k,
line_search=FrankWolfe.Shortstep(2/n^2),
lazy=true,
Expand All @@ -48,15 +48,15 @@ x0 = FrankWolfe.compute_extreme_point(lmo_nat, randn(n, n))
# here the problem is invariant under mirror symmetry around the diagonal and the anti-diagonal
# each solution of the LMO can then be added to the active set together with its orbit
# on top of that, the effective dimension of the space is reduced
# the following function constructs the functions `reduce` and `inflate` needed for SymmetricLMO
# `reduce` maps a matrix to the invariant vector space
# the following function constructs the functions `deflate` and `inflate` needed for SubspaceLMO
# `deflate` maps a matrix to the invariant vector space
# `inflate` maps a vector in this space back to a matrix
# using `FrankWolfe.SymmetricArray` is a convenience to avoid reallocating the result of `inflate`
function build_reduce_inflate(p::Matrix{T}) where {T <: Number}
# using `FrankWolfe.SubspaceVector` is a convenience to avoid reallocating the result of `inflate`
function build_deflate_inflate(p::Matrix{T}) where {T <: Number}
n = size(p, 1)
@assert n == size(p, 2) # square matrix
dimension = floor(Int, (n+1)^2 / 4) # reduced dimension
function reduce(A::AbstractMatrix{T}, lmo)
dimension = floor(Int, (n+1)^2 / 4) # deflated dimension
function deflate(A::AbstractMatrix{T}, lmo)
vec = Vector{T}(undef, dimension)
cnt = 0
@inbounds for i in 1:(n+1)÷2, j in i:n+1-i
Expand All @@ -75,9 +75,9 @@ function build_reduce_inflate(p::Matrix{T}) where {T <: Number}
end
end
end
return FrankWolfe.SymmetricArray(A, vec)
return FrankWolfe.SubspaceVector(A, vec)
end
function inflate(x::FrankWolfe.SymmetricArray, lmo)
function inflate(x::FrankWolfe.SubspaceVector, lmo)
cnt = 0
@inbounds for i in 1:(n+1)÷2, j in i:n+1-i
cnt += 1
Expand All @@ -102,22 +102,22 @@ function build_reduce_inflate(p::Matrix{T}) where {T <: Number}
end
return x.data
end
return reduce, inflate
return deflate, inflate
end

reduce, inflate = build_reduce_inflate(xpi)
const rxp = reduce(xpi, nothing)
@assert dot(rxp, rxp) normxp2 # should be correct thanks to the factors sqrt(2) and 2 in reduce and inflate
deflate, inflate = build_deflate_inflate(xpi)
const rxp = deflate(xpi, nothing)
@assert dot(rxp, rxp) normxp2 # should be correct thanks to the factors sqrt(2) and 2 in deflate and inflate

lmo_sym = FrankWolfe.SymmetricLMO(lmo_nat, reduce, inflate)
lmo_sym = FrankWolfe.SubspaceLMO(lmo_nat, deflate, inflate)

rx0 = FrankWolfe.compute_extreme_point(lmo_sym, reduce(sparse(randn(n, n)), nothing))
rx0 = FrankWolfe.compute_extreme_point(lmo_sym, deflate(sparse(randn(n, n)), nothing))

@time rx, rv, rprimal, rdual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
x -> cf(x, rxp, normxp2),
(str, x) -> cgrad!(str, x, rxp),
lmo_sym,
FrankWolfe.ActiveSetQuadratic([(1.0, rx0)], 2I/n^2, -2rxp/n^2);
FrankWolfe.ActiveSetQuadraticProductCaching([(1.0, rx0)], 2I/n^2, -2rxp/n^2);
max_iteration=k,
line_search=FrankWolfe.Shortstep(2/n^2),
lazy=true,
Expand Down
174 changes: 174 additions & 0 deletions examples/blended_pairwise_with_direct.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
#=
This example demonstrates the use of the Blended Pairwise Conditional Gradient algorithm
with direct solve steps for a quadratic optimization problem over a sparse polytope.
Note the special structure of f(x) = norm(x - x0)^2 that we assume here
The example showcases how the algorithm balances between:
- Pairwise steps for efficient optimization
- Periodic direct solves for handling the quadratic objective
- Lazy (approximate) linear minimization steps for improved iteration complexity
It also demonstrates how to set up custom callbacks for tracking algorithm progress.
=#

using FrankWolfe
using LinearAlgebra
using Random

import HiGHS
import MathOptInterface as MOI

include("../examples/plot_utils.jl")

n = Int(1e4)
k = 10_000

s = 10
@info "Seed $s"
Random.seed!(s)

xpi = rand(n);
total = sum(xpi);

const xp = xpi ./ total;

f(x) = norm(x - xp)^2
function grad!(storage, x)
@. storage = 2 * (x - xp)
end

lmo = FrankWolfe.KSparseLMO(5, 1.0)

const x00 = FrankWolfe.compute_extreme_point(lmo, rand(n))

function build_callback(trajectory_arr)
return function callback(state, active_set, args...)
return push!(trajectory_arr, (FrankWolfe.callback_state(state)..., length(active_set)))
end
end


trajectoryBPCG_standard = []
@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
f,
grad!,
lmo,
copy(x00),
max_iteration=k,
line_search=FrankWolfe.Shortstep(2.0),
verbose=true,
callback=build_callback(trajectoryBPCG_standard),
);

# Just projection quadratic
trajectoryBPCG_quadratic = []
as_quad = FrankWolfe.ActiveSetQuadraticProductCaching([(1.0, copy(x00))], 2 * LinearAlgebra.I, -2xp)
@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
f,
grad!,
lmo,
as_quad,
max_iteration=k,
line_search=FrankWolfe.Shortstep(2.0),
verbose=true,
callback=build_callback(trajectoryBPCG_quadratic),
);

as_quad = FrankWolfe.ActiveSetQuadraticProductCaching([(1.0, copy(x00))], 2 * LinearAlgebra.I, -2xp)

# with quadratic active set
trajectoryBPCG_quadratic_as = []
@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
f,
grad!,
lmo,
as_quad,
max_iteration=k,
line_search=FrankWolfe.Shortstep(2.0),
verbose=true,
callback=build_callback(trajectoryBPCG_quadratic_as),
);

as_quad_direct = FrankWolfe.ActiveSetQuadraticLinearSolve(
[(1.0, copy(x00))],
2 * LinearAlgebra.I,
-2xp,
MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)),
)

# with LP acceleration
trajectoryBPCG_quadratic_direct = []
@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
f,
grad!,
lmo,
as_quad_direct,
max_iteration=k,
line_search=FrankWolfe.Shortstep(2.0),
verbose=true,
callback=build_callback(trajectoryBPCG_quadratic_direct),
);

as_quad_direct_generic = FrankWolfe.ActiveSetQuadraticLinearSolve(
[(1.0, copy(x00))],
2 * Diagonal(ones(length(xp))),
-2xp,
MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)),
)

# with LP acceleration
trajectoryBPCG_quadratic_direct_generic = []
@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
f,
grad!,
lmo,
as_quad_direct_generic,
max_iteration=k,
line_search=FrankWolfe.Shortstep(2.0),
verbose=true,
callback=build_callback(trajectoryBPCG_quadratic_direct_generic),
);

as_quad_direct_basic_as = FrankWolfe.ActiveSetQuadraticLinearSolve(
FrankWolfe.ActiveSet([1.0], [copy(x00)], collect(x00)),
2 * LinearAlgebra.I,
-2xp,
MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)),
)

# with LP acceleration
trajectoryBPCG_quadratic_noqas = []

@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
f,
grad!,
lmo,
as_quad_direct_basic_as,
max_iteration=k,
line_search=FrankWolfe.Shortstep(2.0),
verbose=true,
callback=build_callback(trajectoryBPCG_quadratic_noqas),
);


# Update the data and labels for plotting
data_trajectories = [
trajectoryBPCG_standard,
trajectoryBPCG_quadratic,
trajectoryBPCG_quadratic_as,
trajectoryBPCG_quadratic_direct,
trajectoryBPCG_quadratic_direct_generic,
trajectoryBPCG_quadratic_noqas,
]
labels_trajectories = [
"BPCG (Standard)",
"BPCG (Specific Direct)",
"AS_Quad",
"Reloaded",
"Reloaded_generic",
"Reloaded_noqas",
]

# Plot trajectories
plot_trajectories(data_trajectories, labels_trajectories, xscalelog=false)
Loading

0 comments on commit 43e4647

Please sign in to comment.