Merge branch 'dicg' into dicg_MOI_test

ZIB-IOL · Nov 7, 2024 · 43e4647 · 43e4647
2 parents ce0a38d + 746269b
commit 43e4647
Show file tree

Hide file tree

Showing 51 changed files with 1,715 additions and 335 deletions.
diff --git a/Project.toml b/Project.toml
@@ -44,6 +44,7 @@ DynamicPolynomials = "7c1d4256-1411-5781-91ec-d7bc3513ac07"
 FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 GLPK = "60bf3e95-4087-53dc-ae20-288a0d20c6a6"
+HiGHS = "87dc4568-4c63-4d18-b0c0-bb2238e4078b"
 Hypatia = "b99e6be6-89ff-11e8-14f8-45c827f4f8f2"
 JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
 JuMP = "4076af6c-e467-56ae-b986-b466b2749572"

diff --git a/docs/src/advanced.md b/docs/src/advanced.md
@@ -53,7 +53,7 @@ See [Extra-lazification](@ref) for a complete example.
 
 ## Specialized active set for quadratic functions
 
-If the objective function is quadratic, a considerable speedup can be obtained by using the structure `ActiveSetQuadratic`.
+If the objective function is quadratic, a considerable speedup can be obtained by using the structure `ActiveSetQuadraticProductCaching`.
 It relies on the storage of various scalar products to efficiently determine the best (and worst for `blended_pairwise_conditional_gradient`) atom in the active set without the need of computing many scalar products in each iteration.
 The user should provide the Hessian matrix `A` as well as the linear part `b` of the function, such that:
 ```math
@@ -228,6 +228,6 @@ This subspace is the image of the Reynolds operator defined by
 \mathcal{R}(x)=\frac{1}{|G|}\sum_{g\in G}g\cdot x.
 ```
 
-In practice, the type `SymmetricLMO` allows the user to provide the Reynolds operator $\mathcal{R}$ as well as its adjoint $\mathcal{R}^\ast$.
+In practice, the type `SubspaceLMO` allows the user to provide the Reynolds operator $\mathcal{R}$ as well as its adjoint $\mathcal{R}^\ast$.
 The gradient is symmetrised with $\mathcal{R}^\ast$, then passed to the non-symmetric LMO, and the resulting output is symmetrised with $\mathcal{R}$.
 In many cases, the gradient is already symmetric so that `reynolds_adjoint(gradient, lmo) = gradient` is a fast and valid choice.
diff --git a/docs/src/reference/3_backend.md b/docs/src/reference/3_backend.md
@@ -4,7 +4,7 @@
 
 ```@autodocs
 Modules = [FrankWolfe]
-Pages = ["active_set.jl"]
+Pages = ["active_set.jl", "active_set_quadratic.jl", "active_set_quadratic_direct_solve.jl", "active_set_sparsifier.jl"]
 ```
 
 ## Functions and gradients

diff --git a/examples/alternating_projections.jl b/examples/alternating_projections.jl
@@ -0,0 +1,37 @@
+import FrankWolfe
+using LinearAlgebra
+using Random
+
+
+include("../examples/plot_utils.jl")
+
+Random.seed!(100)
+
+n = 500
+lmo = FrankWolfe.ConvexHullOracle([rand(Float64, (n,)) .+ 1.0 for _ in 1:n])
+lmo2 = FrankWolfe.ConvexHullOracle([rand(Float64, (n,)) .- 1.0 for _ in 1:n])
+
+
+trajectories = []
+
+methods = [FrankWolfe.frank_wolfe, FrankWolfe.blended_pairwise_conditional_gradient, FrankWolfe.blended_pairwise_conditional_gradient]
+
+for (i,m) in enumerate(methods)
+    if i == 1
+        x, _, _, _, traj_data = FrankWolfe.alternating_projections((lmo, lmo2), ones(n); verbose=true, print_iter=100, trajectory=true, proj_method=m)
+    elseif i== 2
+        x, _, _, _, traj_data = FrankWolfe.alternating_projections((lmo, lmo2), ones(n); verbose=true, print_iter=100, trajectory=true, proj_method=m, reuse_active_set=false, lazy=true)
+    else
+        x, _, _, _, traj_data = FrankWolfe.alternating_projections((lmo, lmo2), ones(n); verbose=true, print_iter=100, trajectory=true, proj_method=m, reuse_active_set=true, lazy=true)
+    end
+    push!(trajectories, traj_data)
+end
+
+
+
+labels = ["FW", "BPCG" ,"BPCG (reuse)"]
+
+plot_trajectories(trajectories, labels, xscalelog=true)
+
+
+
diff --git a/examples/birkhoff_polytope.jl b/examples/birkhoff_polytope.jl
@@ -54,7 +54,7 @@ FrankWolfe.benchmark_oracles(
     x -> cf(x, xp, normxp2),
     (str, x) -> cgrad!(str, x, xp),
     lmo,
-    FrankWolfe.ActiveSetQuadratic([(1.0, collect(x0))], 2I/n^2, -2xp/n^2); # surprisingly faster and more memory efficient with collect
+    FrankWolfe.ActiveSetQuadraticProductCaching([(1.0, collect(x0))], 2I/n^2, -2xp/n^2); # surprisingly faster and more memory efficient with collect
     max_iteration=k,
     line_search=FrankWolfe.Shortstep(2/n^2),
     lazy=true,

diff --git a/examples/birkhoff_polytope_symmetric.jl b/examples/birkhoff_polytope_symmetric.jl
@@ -36,7 +36,7 @@ x0 = FrankWolfe.compute_extreme_point(lmo_nat, randn(n, n))
     x -> cf(x, xp, normxp2),
     (str, x) -> cgrad!(str, x, xp),
     lmo_nat,
-    FrankWolfe.ActiveSetQuadratic([(1.0, x0)], 2I/n^2, -2xp/n^2);
+    FrankWolfe.ActiveSetQuadraticProductCaching([(1.0, x0)], 2I/n^2, -2xp/n^2);
     max_iteration=k,
     line_search=FrankWolfe.Shortstep(2/n^2),
     lazy=true,
@@ -48,15 +48,15 @@ x0 = FrankWolfe.compute_extreme_point(lmo_nat, randn(n, n))
 # here the problem is invariant under mirror symmetry around the diagonal and the anti-diagonal
 # each solution of the LMO can then be added to the active set together with its orbit
 # on top of that, the effective dimension of the space is reduced
-# the following function constructs the functions `reduce` and `inflate` needed for SymmetricLMO
-# `reduce` maps a matrix to the invariant vector space
+# the following function constructs the functions `deflate` and `inflate` needed for SubspaceLMO
+# `deflate` maps a matrix to the invariant vector space
 # `inflate` maps a vector in this space back to a matrix
-# using `FrankWolfe.SymmetricArray` is a convenience to avoid reallocating the result of `inflate`
-function build_reduce_inflate(p::Matrix{T}) where {T <: Number}
+# using `FrankWolfe.SubspaceVector` is a convenience to avoid reallocating the result of `inflate`
+function build_deflate_inflate(p::Matrix{T}) where {T <: Number}
     n = size(p, 1)
     @assert n == size(p, 2) # square matrix
-    dimension = floor(Int, (n+1)^2 / 4) # reduced dimension
-    function reduce(A::AbstractMatrix{T}, lmo)
+    dimension = floor(Int, (n+1)^2 / 4) # deflated dimension
+    function deflate(A::AbstractMatrix{T}, lmo)
         vec = Vector{T}(undef, dimension)
         cnt = 0
         @inbounds for i in 1:(n+1)÷2, j in i:n+1-i
@@ -75,9 +75,9 @@ function build_reduce_inflate(p::Matrix{T}) where {T <: Number}
                 end
             end
         end
-        return FrankWolfe.SymmetricArray(A, vec)
+        return FrankWolfe.SubspaceVector(A, vec)
     end
-    function inflate(x::FrankWolfe.SymmetricArray, lmo)
+    function inflate(x::FrankWolfe.SubspaceVector, lmo)
         cnt = 0
         @inbounds for i in 1:(n+1)÷2, j in i:n+1-i
             cnt += 1
@@ -102,22 +102,22 @@ function build_reduce_inflate(p::Matrix{T}) where {T <: Number}
         end
         return x.data
     end
-    return reduce, inflate
+    return deflate, inflate
 end
 
-reduce, inflate = build_reduce_inflate(xpi)
-const rxp = reduce(xpi, nothing)
-@assert dot(rxp, rxp) ≈ normxp2 # should be correct thanks to the factors sqrt(2) and 2 in reduce and inflate
+deflate, inflate = build_deflate_inflate(xpi)
+const rxp = deflate(xpi, nothing)
+@assert dot(rxp, rxp) ≈ normxp2 # should be correct thanks to the factors sqrt(2) and 2 in deflate and inflate
 
-lmo_sym = FrankWolfe.SymmetricLMO(lmo_nat, reduce, inflate)
+lmo_sym = FrankWolfe.SubspaceLMO(lmo_nat, deflate, inflate)
 
-rx0 = FrankWolfe.compute_extreme_point(lmo_sym, reduce(sparse(randn(n, n)), nothing))
+rx0 = FrankWolfe.compute_extreme_point(lmo_sym, deflate(sparse(randn(n, n)), nothing))
 
 @time rx, rv, rprimal, rdual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
     x -> cf(x, rxp, normxp2),
     (str, x) -> cgrad!(str, x, rxp),
     lmo_sym,
-    FrankWolfe.ActiveSetQuadratic([(1.0, rx0)], 2I/n^2, -2rxp/n^2);
+    FrankWolfe.ActiveSetQuadraticProductCaching([(1.0, rx0)], 2I/n^2, -2rxp/n^2);
     max_iteration=k,
     line_search=FrankWolfe.Shortstep(2/n^2),
     lazy=true,

diff --git a/examples/blended_pairwise_with_direct.jl b/examples/blended_pairwise_with_direct.jl
@@ -0,0 +1,174 @@
+#=
+This example demonstrates the use of the Blended Pairwise Conditional Gradient algorithm
+with direct solve steps for a quadratic optimization problem over a sparse polytope. 
+
+Note the special structure of f(x) =  norm(x - x0)^2 that we assume here
+
+The example showcases how the algorithm balances between:
+- Pairwise steps for efficient optimization
+- Periodic direct solves for handling the quadratic objective
+- Lazy (approximate) linear minimization steps for improved iteration complexity
+
+It also demonstrates how to set up custom callbacks for tracking algorithm progress.
+=#
+
+using FrankWolfe
+using LinearAlgebra
+using Random
+
+import HiGHS
+import MathOptInterface as MOI
+
+include("../examples/plot_utils.jl")
+
+n = Int(1e4)
+k = 10_000
+
+s = 10
+@info "Seed $s"
+Random.seed!(s)
+
+xpi = rand(n);
+total = sum(xpi);
+
+const xp = xpi ./ total;
+
+f(x) = norm(x - xp)^2
+function grad!(storage, x)
+    @. storage = 2 * (x - xp)
+end
+
+lmo = FrankWolfe.KSparseLMO(5, 1.0)
+
+const x00 = FrankWolfe.compute_extreme_point(lmo, rand(n))
+
+function build_callback(trajectory_arr)
+    return function callback(state, active_set, args...)
+        return push!(trajectory_arr, (FrankWolfe.callback_state(state)..., length(active_set)))
+    end
+end
+
+
+trajectoryBPCG_standard = []
+@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    copy(x00),
+    max_iteration=k,
+    line_search=FrankWolfe.Shortstep(2.0),
+    verbose=true,
+    callback=build_callback(trajectoryBPCG_standard),
+);
+
+# Just projection quadratic
+trajectoryBPCG_quadratic = []
+as_quad = FrankWolfe.ActiveSetQuadraticProductCaching([(1.0, copy(x00))], 2 * LinearAlgebra.I, -2xp)
+@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    as_quad,
+    max_iteration=k,
+    line_search=FrankWolfe.Shortstep(2.0),
+    verbose=true,
+    callback=build_callback(trajectoryBPCG_quadratic),
+);
+
+as_quad = FrankWolfe.ActiveSetQuadraticProductCaching([(1.0, copy(x00))], 2 * LinearAlgebra.I, -2xp)
+
+# with quadratic active set
+trajectoryBPCG_quadratic_as = []
+@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    as_quad,
+    max_iteration=k,
+    line_search=FrankWolfe.Shortstep(2.0),
+    verbose=true,
+    callback=build_callback(trajectoryBPCG_quadratic_as),
+);
+
+as_quad_direct = FrankWolfe.ActiveSetQuadraticLinearSolve(
+    [(1.0, copy(x00))],
+    2 * LinearAlgebra.I,
+    -2xp,
+    MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)),
+)
+
+# with LP acceleration
+trajectoryBPCG_quadratic_direct = []
+@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    as_quad_direct,
+    max_iteration=k,
+    line_search=FrankWolfe.Shortstep(2.0),
+    verbose=true,
+    callback=build_callback(trajectoryBPCG_quadratic_direct),
+);
+
+as_quad_direct_generic = FrankWolfe.ActiveSetQuadraticLinearSolve(
+    [(1.0, copy(x00))],
+    2 * Diagonal(ones(length(xp))),
+    -2xp,
+    MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)),
+)
+
+# with LP acceleration
+trajectoryBPCG_quadratic_direct_generic = []
+@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    as_quad_direct_generic,
+    max_iteration=k,
+    line_search=FrankWolfe.Shortstep(2.0),
+    verbose=true,
+    callback=build_callback(trajectoryBPCG_quadratic_direct_generic),
+);
+
+as_quad_direct_basic_as = FrankWolfe.ActiveSetQuadraticLinearSolve(
+    FrankWolfe.ActiveSet([1.0], [copy(x00)], collect(x00)),
+    2 * LinearAlgebra.I,
+    -2xp,
+    MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)),
+)
+
+# with LP acceleration
+trajectoryBPCG_quadratic_noqas = []
+
+@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    as_quad_direct_basic_as,
+    max_iteration=k,
+    line_search=FrankWolfe.Shortstep(2.0),
+    verbose=true,
+    callback=build_callback(trajectoryBPCG_quadratic_noqas),
+);
+
+
+# Update the data and labels for plotting
+data_trajectories = [
+    trajectoryBPCG_standard,
+    trajectoryBPCG_quadratic,
+    trajectoryBPCG_quadratic_as,
+    trajectoryBPCG_quadratic_direct,
+    trajectoryBPCG_quadratic_direct_generic,
+    trajectoryBPCG_quadratic_noqas,
+]
+labels_trajectories = [
+    "BPCG (Standard)",
+    "BPCG (Specific Direct)",
+    "AS_Quad",
+    "Reloaded",
+    "Reloaded_generic",
+    "Reloaded_noqas",
+]
+
+# Plot trajectories
+plot_trajectories(data_trajectories, labels_trajectories, xscalelog=false)