From 8c8cc77c9ff911bcb2b1a7bdb3d36e91650d31e5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mathieu=20Besan=C3=A7on?= <mathieu.besancon@gmail.com>
Date: Tue, 3 Sep 2024 14:36:29 +0200
Subject: [PATCH 01/21] added tullio

---
 Project.toml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 270baa783..2d7a962e3 100644
--- a/Project.toml
+++ b/Project.toml
@@ -53,7 +53,8 @@ Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
 Polyhedra = "67491407-f73d-577b-9b50-8179a7c68029"
 ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+Tullio = "bc48ee85-29a4-5162-ae0b-a64e1601d4bc"
 ZipFile = "a5390f91-8eb1-5f08-bee0-b1d1ffed6cea"
 
 [targets]
-test = ["CSV", "Combinatorics", "DataFrames", "DoubleFloats", "DynamicPolynomials", "FiniteDifferences", "ForwardDiff", "GLPK", "JSON", "JuMP", "LaTeXStrings", "MAT", "MultivariatePolynomials", "Plots", "PlotThemes", "Polyhedra", "ReverseDiff", "ZipFile", "Test", "Clp", "Hypatia"]
+test = ["CSV", "Combinatorics", "DataFrames", "DoubleFloats", "DynamicPolynomials", "FiniteDifferences", "ForwardDiff", "GLPK", "JSON", "JuMP", "LaTeXStrings", "MAT", "MultivariatePolynomials", "Plots", "PlotThemes", "Polyhedra", "ReverseDiff", "ZipFile", "Test", "Tullio", "Clp", "Hypatia"]

From 6779b5a7d7b91e6eb8107c6b87141f6d6c969551 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mathieu=20Besan=C3=A7on?= <mathieu.besancon@gmail.com>
Date: Mon, 9 Sep 2024 11:31:41 +0200
Subject: [PATCH 02/21] Dicg tracking (#501)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* add Combinatorics test (#486)

* Secant Method based line search strategy for self-concordant functions (#491)

* initial check - in // secant-based line search for self-concordant functions

* added secant test and added reset_state to prevent side effects with the gradient and the linesearch workspaces / happened with golden-ration with it

* minor updat to example

* minor fixes to line search (clean-up) and example

* MAT in test deps

* remove reversediff

* added data

* more tests

* added doc string

* added a larger portfolio example

* line search docstring attached

* adapted workspace to avoid overwritting the gradient

* formatting

* remove benchmarktools

---------

Co-authored-by: Mathieu Besançon <mathieu.besancon@gmail.com>

* Update Project.toml (#492)

* added tullio

* added tullio (#493)

* Use the best value for phi.

* Secant fix (#495)

* added tullio

* change order for secant

* domain oracle

* BPCG - Real dual gap in Post Processing (#498)

* Report the real dual gap in the post processing.

* Add a note to non montonocity of the dual gap.

* Update README.md

---------

Co-authored-by: Hendrych <dhendryc@mob-009366.zib.de>
Co-authored-by: Mathieu Besançon <mathieu.besancon@gmail.com>

* Update Project.toml (#499)

* Optimal Design Problem Example (#494)

* Optimal Design Problem example.

* Wrap Secant linesearch in MonotonicGenericStep.

* Set the correct Step type flag.

* Use corrected Secant step size. Compare number of iterations. Secant should require less iterations.

* Check only the iterations.

---------

Co-authored-by: Hendrych <dhendryc@mob-009366.zib.de>

* breaking changes for 0.4 (#484)

* breaking change on step type (#483)

* breaking change on step type

* st to steptype_string

* step type names

* Update Project.toml

* adapt log

---------

Co-authored-by: dhendryc <92737336+dhendryc@users.noreply.github.com>

* added secant workspace to track last gamma as start (#500)

* dicg tracking

* adapt to 04

---------

Co-authored-by: Sebastian Pokutta <23001135+pokutta@users.noreply.github.com>
Co-authored-by: Hendrych <dhendryc@mob-009366.zib.de>
Co-authored-by: Sébastien Designolle <55443987+sebastiendesignolle@users.noreply.github.com>
Co-authored-by: dhendryc <92737336+dhendryc@users.noreply.github.com>
---
 src/FrankWolfe.jl               |  2 +-
 src/dicg.jl                     | 43 ++++++++++++++++-----------------
 src/tracking.jl                 |  8 ++++++
 test/decomposition_invariant.jl |  3 +++
 4 files changed, 33 insertions(+), 23 deletions(-)

diff --git a/src/FrankWolfe.jl b/src/FrankWolfe.jl
index 27636717c..e39503847 100644
--- a/src/FrankWolfe.jl
+++ b/src/FrankWolfe.jl
@@ -45,9 +45,9 @@ include("block_coordinate_algorithms.jl")
 include("alternating_methods.jl")
 include("blended_pairwise.jl")
 include("pairwise.jl")
+include("dicg.jl")
 include("tracking.jl")
 include("callback.jl")
-include("dicg.jl")
 
 # collecting most common data types etc and precompile
 # min version req set to 1.5 to prevent stalling of julia 1
diff --git a/src/dicg.jl b/src/dicg.jl
index a26ddba77..eacf4e707 100644
--- a/src/dicg.jl
+++ b/src/dicg.jl
@@ -65,7 +65,7 @@ function decomposition_invariant_conditional_gradient(
     headers = ("Type", "Iteration", "Primal", "Dual", "Dual Gap", "Time", "It/sec")
     function format_state(state, args...)
         rep = (
-            st[Symbol(state.tt)],
+            steptype_string[Symbol(state.step_type)],
             string(state.t),
             Float64(state.primal),
             Float64(state.primal - state.dual_gap),
@@ -105,7 +105,7 @@ function decomposition_invariant_conditional_gradient(
 
     t = 0
     primal = convert(float(eltype(x)), Inf)
-    tt = regular
+    step_type = ST_REGULAR
     time_start = time_ns()
 
     d = similar(x)
@@ -121,10 +121,10 @@ function decomposition_invariant_conditional_gradient(
             "MEMORY_MODE: $memory_mode STEPSIZE: $line_search EPSILON: $epsilon MAXITERATION: $max_iteration TYPE: $NumType",
         )
         grad_type = typeof(gradient)
-        println("GRADIENTTYPE: $grad_type LAZY: $lazy lazy_tolerance: $lazy_tolerance")
+        println("GRADIENstep_typeYPE: $grad_type LAZY: $lazy lazy_tolerance: $lazy_tolerance")
         println("LMO: $(typeof(lmo))")
         if memory_mode isa InplaceEmphasis
-            @info("In memory_mode memory iterates are written back into x0!")
+            @info("In memory_mode memory iterates are wristep_typeen back into x0!")
         end
     end
 
@@ -166,7 +166,7 @@ function decomposition_invariant_conditional_gradient(
         end
 
         if lazy
-            d, v, v_index, a, away_index, phi, tt = 
+            d, v, v_index, a, away_index, phi, step_type = 
                 lazy_dicg_step(
                     x, 
                     gradient, 
@@ -219,7 +219,7 @@ function decomposition_invariant_conditional_gradient(
                 grad!,
                 lmo,
                 gradient,
-                tt,
+                step_type,
             )
             if callback(state) === false
                 break
@@ -238,7 +238,7 @@ function decomposition_invariant_conditional_gradient(
     primal = f(x)
     dual_gap = fast_dot(x, gradient) - fast_dot(v, gradient)
     if verbose
-        tt = last
+        step_type = ST_LAST
         tot_time = (time_ns() - time_start) / 1e9
         if callback !== nothing
             state = CallbackState(
@@ -255,7 +255,7 @@ function decomposition_invariant_conditional_gradient(
                 grad!,
                 lmo,
                 gradient,
-                tt,
+                step_type,
             )
             callback(state)
         end
@@ -300,7 +300,7 @@ function blended_decomposition_invariant_conditional_gradient(
     headers = ("Type", "Iteration", "Primal", "Dual", "Dual Gap", "Time", "It/sec")
     function format_state(state, args...)
         rep = (
-            st[Symbol(state.tt)],
+            steptype_string[Symbol(state.step_type)],
             string(state.t),
             Float64(state.primal),
             Float64(state.primal - state.dual_gap),
@@ -331,7 +331,7 @@ function blended_decomposition_invariant_conditional_gradient(
 
     t = 0
     primal = convert(float(eltype(x)), Inf)
-    tt = regular
+    step_type = ST_REGULAR
     time_start = time_ns()
 
     d = similar(x)
@@ -347,10 +347,10 @@ function blended_decomposition_invariant_conditional_gradient(
             "MEMORY_MODE: $memory_mode STEPSIZE: $line_search EPSILON: $epsilon MAXITERATION: $max_iteration TYPE: $NumType",
         )
         grad_type = typeof(gradient)
-        println("GRADIENTTYPE: $grad_type LAZY: $lazy lazy_tolerance: $lazy_tolerance")
+        println("GRADIENstep_typeYPE: $grad_type LAZY: $lazy lazy_tolerance: $lazy_tolerance")
         println("LMO: $(typeof(lmo))")
         if memory_mode isa InplaceEmphasis
-            @info("In memory_mode memory iterates are written back into x0!")
+            @info("In memory_mode memory iterates are wristep_typeen back into x0!")
         end
     end
 
@@ -401,11 +401,11 @@ function blended_decomposition_invariant_conditional_gradient(
             phi = dual_gap
             # in-face step
             if inface_gap >= phi / lazy_tolerance
-                tt = pairwise
+                step_type = ST_PAIRWISE
                 d = muladd_memory_mode(memory_mode, d, a, v)
                 gamma_max = dicg_maximum_step(lmo, d, x)
             else # global FW step
-                tt = regular
+                step_type = ST_REGULAR
                 d = muladd_memory_mode(memory_mode, d, x, v)
                 gamma_max = one(phi)
             end
@@ -437,7 +437,7 @@ function blended_decomposition_invariant_conditional_gradient(
                 grad!,
                 lmo,
                 gradient,
-                tt,
+                step_type,
             )
             if callback(state) === false
                 break
@@ -456,7 +456,7 @@ function blended_decomposition_invariant_conditional_gradient(
     primal = f(x)
     dual_gap = fast_dot(x, gradient) - fast_dot(v, gradient)
     if verbose
-        tt = last
+        step_type = ST_LAST
         tot_time = (time_ns() - time_start) / 1e9
         if callback !== nothing
             state = CallbackState(
@@ -473,7 +473,7 @@ function blended_decomposition_invariant_conditional_gradient(
                 grad!,
                 lmo,
                 gradient,
-                tt,
+                step_type,
             )
             callback(state)
         end
@@ -496,8 +496,7 @@ function lazy_dicg_step(
 )
     v_local, v_local_loc, val, a_local, a_local_loc, valM =
         pre_computed_set_argminmax(pre_computed_set, gradient)
-    tt = regular
-    gamma_max = nothing
+    step_type = ST_REGULAR
     away_index = nothing
     fw_index = nothing
     grad_dot_x = fast_dot(x, gradient)
@@ -508,7 +507,7 @@ function lazy_dicg_step(
 
     if grad_dot_a_local - grad_dot_lazy_fw_vertex >= phi / lazy_tolerance &&
        grad_dot_a_local - grad_dot_lazy_fw_vertex >= epsilon
-        tt = lazy
+        step_type = ST_LAZY
         v = v_local
         a = a_local
         d = muladd_memory_mode(memory_mode, d, a, v)
@@ -519,7 +518,7 @@ function lazy_dicg_step(
         # Do lazy inface_point
         if grad_dot_a_local - grad_dot_v >= phi / lazy_tolerance && 
             grad_dot_a_local - grad_dot_v >= epsilon
-            tt = lazy
+            step_type = ST_LAZY
             a = a_local
             away_index = a_local_loc
         else
@@ -538,5 +537,5 @@ function lazy_dicg_step(
             phi = min(dual_gap, phi / 2.0)
         end
     end
-    return d, v, fw_index, a, away_index, phi, tt
+    return d, v, fw_index, a, away_index, phi, step_type
 end
diff --git a/src/tracking.jl b/src/tracking.jl
index 949806fbf..c8b4d7578 100644
--- a/src/tracking.jl
+++ b/src/tracking.jl
@@ -62,3 +62,11 @@ is_tracking_lmo(lmo) = false
 is_tracking_lmo(lmo::TrackingLMO) = true
 
 TrackingLMO(lmo) = TrackingLMO(lmo, 0)
+
+is_decomposition_invariant_oracle(lmo::TrackingLMO) = is_decomposition_invariant_oracle(lmo.lmo)
+
+function compute_inface_extreme_point(lmo::TrackingLMO, direction, x; lazy, kwargs...)
+    return compute_inface_extreme_point(lmo.lmo, direction, x; lazy=lazy, kwargs...)
+end
+
+dicg_maximum_step(lmo::TrackingLMO, direction, x) = dicg_maximum_step(lmo.lmo, direction, x)
diff --git a/test/decomposition_invariant.jl b/test/decomposition_invariant.jl
index 0d6e2a049..f6d852f03 100644
--- a/test/decomposition_invariant.jl
+++ b/test/decomposition_invariant.jl
@@ -247,5 +247,8 @@ end
         res_fw = FrankWolfe.frank_wolfe(f, grad!, lmo, FrankWolfe.compute_extreme_point(lmo, randn(n)))
         res_dicg = FrankWolfe.decomposition_invariant_conditional_gradient(f, grad!, lmo, FrankWolfe.compute_extreme_point(lmo, randn(n)))
         @test norm(res_fw.x - res_dicg.x) ≤ 1e-4
+        lmo_tracking = FrankWolfe.TrackingLMO(lmo)
+        res_dicg_tracking = FrankWolfe.decomposition_invariant_conditional_gradient(f, grad!, lmo_tracking, FrankWolfe.compute_extreme_point(lmo, randn(n)))
+        @test norm(res_dicg_tracking.x - res_dicg.x) ≤ 1e-4
     end
 end

From 456139a6a5e81199ce0e764d39a0877c86b15e51 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mathieu=20Besan=C3=A7on?= <mathieu.besancon@gmail.com>
Date: Thu, 24 Oct 2024 20:22:49 +0200
Subject: [PATCH 03/21] inverse to linsolve for AS quadratic (#510)

---
 src/active_set_quadratic.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/active_set_quadratic.jl b/src/active_set_quadratic.jl
index b200a8912..8f26b632e 100644
--- a/src/active_set_quadratic.jl
+++ b/src/active_set_quadratic.jl
@@ -36,7 +36,7 @@ function detect_quadratic_function(grad!, x0; test=true)
         X[:, i] .-= x0
         G[:, i] .= storage .- g0
     end
-    A = G * inv(X)
+    A = G \ X
     b = g0 - A * x0
     if test
         x_test = randn(T, n)

From ffc360caf3027de2f334a407890943f664a89696 Mon Sep 17 00:00:00 2001
From: "Jannis H." <44263801+JannisHal@users.noreply.github.com>
Date: Thu, 24 Oct 2024 23:19:50 +0200
Subject: [PATCH 04/21] Alternating projection update (#513)

* Update and generalize AP

* Reuse active step and dual_gap information

* Example comparing active vanilla FW, bpcg and active set reuse

* Bug fix

* dist2 - reduce allocations

* Format document

* Remove obsolete dependence
---
 examples/alternating_projections.jl |  37 +++++++
 src/alternating_methods.jl          | 163 ++++++++++++++++------------
 test/alternating_methods_tests.jl   |  35 +++---
 3 files changed, 149 insertions(+), 86 deletions(-)
 create mode 100644 examples/alternating_projections.jl

diff --git a/examples/alternating_projections.jl b/examples/alternating_projections.jl
new file mode 100644
index 000000000..159d204ff
--- /dev/null
+++ b/examples/alternating_projections.jl
@@ -0,0 +1,37 @@
+import FrankWolfe
+using LinearAlgebra
+using Random
+
+
+include("../examples/plot_utils.jl")
+
+Random.seed!(100)
+
+n = 500
+lmo = FrankWolfe.ConvexHullOracle([rand(Float64, (n,)) .+ 1.0 for _ in 1:n])
+lmo2 = FrankWolfe.ConvexHullOracle([rand(Float64, (n,)) .- 1.0 for _ in 1:n])
+
+
+trajectories = []
+
+methods = [FrankWolfe.frank_wolfe, FrankWolfe.blended_pairwise_conditional_gradient, FrankWolfe.blended_pairwise_conditional_gradient]
+
+for (i,m) in enumerate(methods)
+    if i == 1
+        x, _, _, _, traj_data = FrankWolfe.alternating_projections((lmo, lmo2), ones(n); verbose=true, print_iter=100, trajectory=true, proj_method=m)
+    elseif i== 2
+        x, _, _, _, traj_data = FrankWolfe.alternating_projections((lmo, lmo2), ones(n); verbose=true, print_iter=100, trajectory=true, proj_method=m, reuse_active_set=false, lazy=true)
+    else
+        x, _, _, _, traj_data = FrankWolfe.alternating_projections((lmo, lmo2), ones(n); verbose=true, print_iter=100, trajectory=true, proj_method=m, reuse_active_set=true, lazy=true)
+    end
+    push!(trajectories, traj_data)
+end
+
+
+
+labels = ["FW", "BPCG" ,"BPCG (reuse)"]
+
+plot_trajectories(trajectories, labels, xscalelog=true)
+
+
+
diff --git a/src/alternating_methods.jl b/src/alternating_methods.jl
index ec1c5b2f5..d07a73649 100644
--- a/src/alternating_methods.jl
+++ b/src/alternating_methods.jl
@@ -35,12 +35,12 @@ end
 
 Alternating Linear Minimization minimizes the objective `f` over the intersections of the feasible domains specified by `lmos`.
 The tuple `x0` defines the initial points for each domain.
-Returns a tuple `(x, v, primal, dual_gap, infeas, traj_data)` with:
+Returns a tuple `(x, v, primal, dual_gap, dist2, traj_data)` with:
 - `x` cartesian product of final iterates
 - `v` cartesian product of last vertices of the LMOs
 - `primal` primal value `f(x)`
 - `dual_gap` final Frank-Wolfe gap
-- `infeas` sum of squared, pairwise distances between iterates
+- `dist2` is 1/2 of the sum of squared, pairwise distances between iterates
 - `traj_data` vector of trajectory information.
 """
 function alternating_linear_minimization(
@@ -49,17 +49,17 @@ function alternating_linear_minimization(
     grad!,
     lmos::NTuple{N,LinearMinimizationOracle},
     x0::Tuple{Vararg{Any,N}};
-    lambda::Union{Float64, Function}=1.0,
+    lambda::Union{Float64,Function}=1.0,
     verbose=false,
     trajectory=false,
     callback=nothing,
     max_iteration=10000,
-    print_iter = max_iteration / 10,
+    print_iter=max_iteration / 10,
     memory_mode=InplaceEmphasis(),
     line_search::LS=Adaptive(),
     epsilon=1e-7,
     kwargs...,
-) where {N, LS<:Union{LineSearchMethod,NTuple{N,LineSearchMethod}}}
+) where {N,LS<:Union{LineSearchMethod,NTuple{N,LineSearchMethod}}}
 
     x0_bc = BlockVector([x0[i] for i in 1:N], [size(x0[i]) for i in 1:N], sum(length, x0))
     gradf = similar(x0_bc)
@@ -74,21 +74,14 @@ function alternating_linear_minimization(
             for i in 1:N
                 grad!(gradf.blocks[i], x.blocks[i])
             end
-            t = [2.0 * (N * b - sum(x.blocks)) for b in x.blocks]
+            t = [N * b - sum(x.blocks) for b in x.blocks]
             return storage.blocks = λ[] * gradf.blocks + t
         end
     end
 
-    function dist2(x::BlockVector) 
-        s = 0
-        for i=1:N
-            for j=1:i-1
-                diff = x.blocks[i] - x.blocks[j]
-                s += fast_dot(diff, diff)
-            end
-        end
-        return s
-    end
+    dist2(x::BlockVector) =
+        0.5 * (N - 1) * sum(fast_dot(x.blocks[i], x.blocks[i]) for i in 1:N) -
+        sum(fast_dot(x.blocks[i], x.blocks[j]) for i in 1:N for j in 1:i-1)
 
     function build_objective()
         λ = Ref(λ0)
@@ -122,8 +115,11 @@ function alternating_linear_minimization(
 
         num_type = eltype(x0[1])
         grad_type = eltype(gradf.blocks[1])
-        line_search_type = line_search isa Tuple ? [typeof(a) for a in line_search] : typeof(line_search)
-        println("MEMORY_MODE: $memory_mode STEPSIZE: $line_search_type EPSILON: $epsilon MAXITERATION: $max_iteration")
+        line_search_type =
+            line_search isa Tuple ? [typeof(a) for a in line_search] : typeof(line_search)
+        println(
+            "MEMORY_MODE: $memory_mode STEPSIZE: $line_search_type EPSILON: $epsilon MAXITERATION: $max_iteration",
+        )
         println("TYPE: $num_type GRADIENTTYPE: $grad_type")
         println("LAMBDA: $lambda")
 
@@ -177,7 +173,7 @@ function alternating_linear_minimization(
     end
 
     if lambda isa Function
-        callback  = function (state,args...)
+        callback = function (state, args...)
             state.f.λ[] = lambda(state)
             state.grad!.λ[] = state.f.λ[]
 
@@ -211,33 +207,15 @@ function alternating_linear_minimization(
 end
 
 
-function ProjectionFW(y, lmo; max_iter=10000, eps=1e-3)
-    f(x) = sum(abs2, x - y)
-    grad!(storage, x) = storage .= 2 * (x - y)
-
-    x0 = FrankWolfe.compute_extreme_point(lmo, y)
-    x_opt, _ = FrankWolfe.frank_wolfe(
-        f,
-        grad!,
-        lmo,
-        x0,
-        epsilon=eps,
-        max_iteration=max_iter,
-        trajectory=true,
-        line_search=FrankWolfe.Adaptive(verbose=false, relaxed_smoothness=false),
-    )
-    return x_opt
-end
-
 """
     alternating_projections(lmos::NTuple{N,LinearMinimizationOracle}, x0; ...) where {N}
 
 Computes a point in the intersection of feasible domains specified by `lmos`.
-Returns a tuple `(x, v, dual_gap, infeas, traj_data)` with:
+Returns a tuple `(x, v, dual_gap, dist2, traj_data)` with:
 - `x` cartesian product of final iterates
 - `v` cartesian product of last vertices of the LMOs
 - `dual_gap` final Frank-Wolfe gap
-- `infeas` sum of squared, pairwise distances between iterates
+- `dist2` is 1/2 * sum of squared, pairwise distances between iterates
 - `traj_data` vector of trajectory information.
 """
 function alternating_projections(
@@ -252,6 +230,10 @@ function alternating_projections(
     callback=nothing,
     traj_data=[],
     timeout=Inf,
+    proj_method=frank_wolfe,
+    inner_epsilon::Function=t -> 1 / (t^2 + 1),
+    reuse_active_set=false,
+    kwargs...,
 ) where {N}
     return alternating_projections(
         ProductLMO(lmos),
@@ -265,6 +247,10 @@ function alternating_projections(
         callback,
         traj_data,
         timeout,
+        proj_method,
+        inner_epsilon,
+        reuse_active_set,
+        kwargs...,
     )
 end
 
@@ -281,17 +267,21 @@ function alternating_projections(
     callback=nothing,
     traj_data=[],
     timeout=Inf,
+    proj_method=frank_wolfe,
+    inner_epsilon::Function=t -> 1 / (t^2 + 1),
+    reuse_active_set=false,
+    kwargs...,
 ) where {N}
 
     # header and format string for output of the algorithm
-    headers = ["Type", "Iteration", "Dual Gap", "Infeas", "Time", "It/sec"]
+    headers = ["Type", "Iteration", "Dual Gap", "dist2", "Time", "It/sec"]
     format_string = "%6s %13s %14e %14e %14e %14e\n"
-    function format_state(state, infeas)
+    function format_state(state, primal)
         rep = (
             steptype_string[Symbol(state.step_type)],
             string(state.t),
             Float64(state.dual_gap),
-            Float64(infeas),
+            Float64(primal),
             state.time,
             state.t / state.time,
         )
@@ -300,27 +290,61 @@ function alternating_projections(
 
     t = 0
     dual_gap = Inf
-    x = fill(x0, N)
-    v = similar(x)
+    dual_gaps = fill(Inf, N)
+    x = BlockVector(compute_extreme_point.(lmo.lmos, fill(x0, N)))
     step_type = ST_REGULAR
     gradient = similar(x)
-    ndim = ndims(x)
 
-    infeasibility(x) = sum(
-        fast_dot(
-            selectdim(x, ndim, i) - selectdim(x, ndim, j),
-            selectdim(x, ndim, i) - selectdim(x, ndim, j),
-        ) for i in 1:N for j in 1:i-1
-    )
+    if reuse_active_set
+        if proj_method ∉
+           [away_frank_wolfe, blended_pairwise_conditional_gradient, pairwise_frank_wolfe]
+            error("The selected FW method does not support active sets reuse.")
+        end
+        active_sets = [ActiveSet([(1.0, x.blocks[i])]) for i in 1:N]
+    end
 
-    partial_infeasibility(x) =
-        sum(fast_dot(x[mod(i - 2, N)+1] - x[i], x[mod(i - 2, N)+1] - x[i]) for i in 1:N)
+    dist2(x::BlockVector) =
+        0.5 * (N - 1) * sum(fast_dot(x.blocks[i], x.blocks[i]) for i in 1:N) -
+        sum(fast_dot(x.blocks[i], x.blocks[j]) for i in 1:N for j in 1:i-1)
 
     function grad!(storage, x)
-        @. storage = [2 * (x[i] - x[mod(i - 2, N)+1]) for i in 1:N]
+        return storage.blocks = [2.0 * (N * b - sum(x.blocks)) for b in x.blocks]
     end
 
-    projection_step(x, i, t) = ProjectionFW(x, lmo.lmos[i]; eps=1 / (t^2 + 1))
+    function projection_step(i, t)
+        xii = x.blocks[mod(i - 2, N)+1] # iterate in previous block
+        f(y) = sum(abs2, y - xii)
+        function grad_proj!(storage, y)
+            return storage .= 2 * (y - xii)
+        end
+
+        if reuse_active_set
+
+            results = proj_method(
+                f,
+                grad_proj!,
+                lmo.lmos[i],
+                active_sets[i];
+                epsilon=inner_epsilon(t),
+                max_iteration=10000,
+                line_search=Adaptive(),
+                kwargs...,
+            )
+            active_sets[i] = results[:active_set]
+        else
+            results = proj_method(
+                f,
+                grad_proj!,
+                lmo.lmos[i],
+                x.blocks[i];
+                epsilon=inner_epsilon(t),
+                max_iteration=10000,
+                line_search=Adaptive(),
+                kwargs...,
+            )
+        end
+        return results[:x], results[:dual_gap]
+    end
 
 
     if trajectory
@@ -372,19 +396,18 @@ function alternating_projections(
         # Projection step:
         for i in 1:N
             # project the previous iterate on the i-th feasible region
-            x[i] = projection_step(x[mod(i - 2, N)+1], i, t)
+            x.blocks[i], dual_gaps[i] = projection_step(i, t)
         end
 
+        dual_gap = sum(dual_gaps)
+
         # Update gradients
         grad!(gradient, x)
 
-        # Update dual gaps
-        v = compute_extreme_point.(lmo.lmos, gradient)
-        dual_gap = fast_dot(x - v, gradient)
 
         # go easy on the memory - only compute if really needed
         if ((mod(t, print_iter) == 0 && verbose) || callback !== nothing)
-            infeas = infeasibility(x)
+            primal = dist2(x)
         end
 
         first_iter = false
@@ -393,12 +416,12 @@ function alternating_projections(
         if callback !== nothing
             state = CallbackState(
                 t,
-                infeas,
-                infeas - dual_gap,
+                primal,
+                primal - dual_gap,
                 dual_gap,
                 tot_time,
                 x,
-                v,
+                nothing,
                 nothing,
                 nothing,
                 nothing,
@@ -408,7 +431,7 @@ function alternating_projections(
                 step_type,
             )
             # @show state
-            if callback(state, infeas) === false
+            if callback(state, primal) === false
                 break
             end
         end
@@ -419,9 +442,9 @@ function alternating_projections(
     # this is important as some variants do not recompute f(x) and the dual_gap regularly but only when reporting
     # hence the final computation.
     step_type = ST_LAST
-    infeas = infeasibility(x)
+    primal = dist2(x)
     grad!(gradient, x)
-    v = compute_extreme_point.(lmo.lmos, gradient)
+    v = compute_extreme_point(lmo, gradient)
     dual_gap = fast_dot(x - v, gradient)
 
     tot_time = (time_ns() - time_start) / 1.0e9
@@ -429,8 +452,8 @@ function alternating_projections(
     if callback !== nothing
         state = CallbackState(
             t,
-            infeas,
-            infeas - dual_gap,
+            primal,
+            primal - dual_gap,
             dual_gap,
             tot_time,
             x,
@@ -443,9 +466,9 @@ function alternating_projections(
             gradient,
             step_type,
         )
-        callback(state, infeas)
+        callback(state, primal)
     end
 
-    return x, v, dual_gap, infeas, traj_data
+    return x, v, dual_gap, primal, traj_data
 
 end
diff --git a/test/alternating_methods_tests.jl b/test/alternating_methods_tests.jl
index 904de49d8..21deecbe4 100644
--- a/test/alternating_methods_tests.jl
+++ b/test/alternating_methods_tests.jl
@@ -27,7 +27,7 @@ lmo3 = FrankWolfe.ScaledBoundLInfNormBall(ones(n), 2 * ones(n))
         grad!,
         (lmo_nb, lmo_prob),
         ones(n),
-        lambda=1.0,
+        lambda=0.5,
         line_search=FrankWolfe.Adaptive(relaxed_smoothness=true),
     )
 
@@ -40,7 +40,7 @@ lmo3 = FrankWolfe.ScaledBoundLInfNormBall(ones(n), 2 * ones(n))
         grad!,
         (lmo_nb, lmo_prob),
         ones(n),
-        lambda=1/3,
+        lambda=1/6,
         line_search=FrankWolfe.Adaptive(relaxed_smoothness=true),
     )
 
@@ -53,7 +53,7 @@ lmo3 = FrankWolfe.ScaledBoundLInfNormBall(ones(n), 2 * ones(n))
         grad!,
         (lmo_nb, lmo_prob),
         ones(n),
-        lambda=1/9,
+        lambda=1/18,
         line_search=FrankWolfe.Adaptive(relaxed_smoothness=true),
     )
 
@@ -66,7 +66,7 @@ lmo3 = FrankWolfe.ScaledBoundLInfNormBall(ones(n), 2 * ones(n))
         grad!,
         (lmo_nb, lmo_prob),
         ones(n),
-        lambda=3.0,
+        lambda=1.5,
         line_search=FrankWolfe.Adaptive(relaxed_smoothness=true),
     )
 
@@ -134,6 +134,7 @@ lmo3 = FrankWolfe.ScaledBoundLInfNormBall(ones(n), 2 * ones(n))
         ones(n),
         line_search=FrankWolfe.Agnostic(),
         momentum=0.9,
+        lambda=0.5
     )
 
     @test abs(x.blocks[1][1] - 0.5 / n) < 1e-3
@@ -160,6 +161,7 @@ end
             ones(n),
             line_search=FrankWolfe.Adaptive(relaxed_smoothness=true),
             update_order=order,
+            lambda=0.5,
         )
         @test abs(x.blocks[1][1] - 0.5 / n) < 1e-5
         @test abs(x.blocks[2][1] - 1 / n) < 1e-5
@@ -181,6 +183,7 @@ end
             grad!,
             (lmo2, lmo_prob),
             ones(n),
+            lambda=0.5
         )
 
         @test abs(x.blocks[1][1] - 0.5 / n) < 1e-6
@@ -244,8 +247,8 @@ end
         grad!,
         (lmo_nb, lmo_prob),
         ones(n),
-        lambda=3.0,
-        line_search=(FrankWolfe.Shortstep(8.0), FrankWolfe.Adaptive()), # L-smooth in coordinates for L = 2+2*λ
+        lambda=1.5,
+        line_search=(FrankWolfe.Shortstep(4.0), FrankWolfe.Adaptive()), # L-smooth in coordinates for L = 1+2*λ
         update_step=(FrankWolfe.BPCGStep(), FrankWolfe.FrankWolfeStep()),
     )
 
@@ -257,31 +260,31 @@ end
 
     x, _, _, _, _ = FrankWolfe.alternating_projections((lmo1, lmo_prob), rand(n), verbose=true)
 
-    @test abs(x[1][1]) < 1e-6
-    @test abs(x[2][1] - 1 / n) < 1e-6
+    @test abs(x.blocks[1][1]) < 1e-6
+    @test abs(x.blocks[2][1] - 1 / n) < 1e-6
 
     x, _, _, _, _ = FrankWolfe.alternating_projections((lmo3, lmo_prob), rand(n))
 
-    @test abs(x[1][1] - 1) < 1e-6
-    @test abs(x[2][1] - 1 / n) < 1e-6
+    @test abs(x.blocks[1][1] - 1) < 1e-6
+    @test abs(x.blocks[2][1] - 1 / n) < 1e-6
 
     x, _, _, infeas, _ = FrankWolfe.alternating_projections((lmo1, lmo2), rand(n))
 
-    @test abs(x[1][1]) < 1e-6
-    @test abs(x[2][1]) < 1e-6
+    @test abs(x.blocks[1][1]) < 1e-6
+    @test abs(x.blocks[2][1]) < 1e-6
     @test infeas < 1e-6
 
     x, _, _, infeas, _ = FrankWolfe.alternating_projections((lmo2, lmo3), rand(n))
 
-    @test abs(x[1][1] - 1) < 1e-4
-    @test abs(x[2][1] - 1) < 1e-4
+    @test abs(x.blocks[1][1] - 1) < 1e-4
+    @test abs(x.blocks[2][1] - 1) < 1e-4
     @test infeas < 1e-6
 
     x, _, _, infeas, traj_data =
         FrankWolfe.alternating_projections((lmo1, lmo3), rand(n), trajectory=true)
 
-    @test abs(x[1][1]) < 1e-6
-    @test abs(x[2][1] - 1) < 1e-6
+    @test abs(x.blocks[1][1]) < 1e-6
+    @test abs(x.blocks[2][1] - 1) < 1e-6
     @test traj_data != []
     @test length(traj_data[1]) == 5
     @test length(traj_data) >= 2

From 73585af8bdb92e84d1c5afc554f31da3168ec302 Mon Sep 17 00:00:00 2001
From: "Jannis H." <44263801+JannisHal@users.noreply.github.com>
Date: Thu, 24 Oct 2024 23:20:20 +0200
Subject: [PATCH 05/21] Update Block-Coordinate FW (#512)

* BPCG step fixes

* Update Dual(Progress/Gap)Order

* Clean up

* Remove bug in test

* Immutable DualGapOrder
---
 src/block_coordinate_algorithms.jl | 77 +++++++++++++++++++++---------
 test/alternating_methods_tests.jl  |  2 +-
 2 files changed, 55 insertions(+), 24 deletions(-)

diff --git a/src/block_coordinate_algorithms.jl b/src/block_coordinate_algorithms.jl
index 4a1fc7e6a..0d034a7d3 100644
--- a/src/block_coordinate_algorithms.jl
+++ b/src/block_coordinate_algorithms.jl
@@ -41,20 +41,29 @@ end
 
 StochasticUpdate() = StochasticUpdate(-1)
 
-mutable struct DualGapOrder <: BlockCoordinateUpdateOrder
+"""
+The dual gap order initiates one round of `ļimit` many updates.
+The according blocks are sampled with probabilties proportional to their respective dual gaps.
+"""
+struct DualGapOrder <: BlockCoordinateUpdateOrder
     limit::Int
 end
 
 DualGapOrder() = DualGapOrder(-1)
 
+"""
+The dual progress order initiates one round of `ļimit` many updates.
+The according blocks are sampled with probabilties proportional to their respective dual progress.
+"""
 mutable struct DualProgressOrder <: BlockCoordinateUpdateOrder
     limit::Int
     previous_dual_gaps::Vector{Float64}
     dual_progress::Vector{Float64}
+    last_indices::Vector{Int}
 end
 
-DualProgressOrder() = DualProgressOrder(-1, [], [])
-DualProgressOrder(i::Int64) = DualProgressOrder(i, [], [])
+DualProgressOrder() = DualProgressOrder(-1, [], [], [])
+DualProgressOrder(i::Int64) = DualProgressOrder(i, [], [], [])
 
 """
 The Lazy update order is discussed in "Flexible block-iterative
@@ -122,6 +131,19 @@ function select_update_indices(u::StochasticUpdate, s::CallbackState, _)
     return [[rand(1:l)] for i in 1:u.limit]
 end
 
+function sample_without_replacement(n::Int, weights::Vector{Float64})
+    weights = weights ./ sum(weights)
+    cumsum_weights = cumsum(weights)
+    indices = zeros(Int, n)
+    for i in 1:n
+        indices[i] = findfirst(cumsum_weights .> rand())
+        weights[indices[i]] = 0
+        weights = weights ./ sum(weights)
+        cumsum_weights = cumsum(weights)
+    end
+    return indices
+end
+
 function select_update_indices(u::DualProgressOrder, s::CallbackState, dual_gaps)
 
     l = length(s.lmo.lmos)
@@ -129,22 +151,32 @@ function select_update_indices(u::DualProgressOrder, s::CallbackState, dual_gaps
     @assert u.limit <= l
     @assert u.limit > 0 || u.limit == -1
 
-    # In the first iteration update every block so we get finite dual progress
-    if s.t < 2 
-        u.previous_dual_gaps = copy(dual_gaps)
-        return [[i] for i=1:l] 
-    end
-
-    if s.t == 1
-        u.dual_progress = dual_gaps - u.previous_dual_gaps
+    # In the first two iterations update every block so we get finite dual progress
+    if s.t < 2
+        u.dual_progress = ones(l)
+        indices = [[i for i=1:l]]
     else
-        diff = dual_gaps - u.previous_dual_gaps
-        u.dual_progress = [d == 0 ? u.dual_progress[i] : d for (i, d) in enumerate(diff)]
+        # Update dual progress only on updated blocks
+        for i in u.last_indices
+            d = u.previous_dual_gaps[i] - dual_gaps[i]
+            if d < 0
+                u.dual_progress[i] = 0
+            else
+                u.dual_progress[i] = d
+            end
+        end
+        n = u.limit == -1 ? l : u.limit
+
+        # If less than n blocks have non-zero dual progress, update all of them
+        if sum(u.dual_progress .!= 0) < n
+            indices = [[i for i=1:l]]
+        else
+            indices = [sample_without_replacement(n, u.dual_progress)]
+        end
     end
     u.previous_dual_gaps = copy(dual_gaps)
-
-    n = u.limit == -1 ? l : u.limit
-    return [[findfirst(cumsum(u.dual_progress/sum(u.dual_progress)) .> rand())] for _=1:n]
+    u.last_indices = vcat(indices...)
+    return indices
 end
 
 function select_update_indices(u::DualGapOrder, s::CallbackState, dual_gaps)
@@ -156,11 +188,11 @@ function select_update_indices(u::DualGapOrder, s::CallbackState, dual_gaps)
 
     # In the first iteration update every block so we get finite dual gaps
     if s.t < 1
-        return [[i] for i=1:l] 
+        return [[i for i=1:l]]
     end
 
     n = u.limit == -1 ? l : u.limit
-    return [[findfirst(cumsum(dual_gaps/sum(dual_gaps)) .> rand())] for _=1:n]
+    return [sample_without_replacement(n, dual_gaps)]
 end
 
 function select_update_indices(update::LazyUpdate, s::CallbackState, dual_gaps)
@@ -237,19 +269,18 @@ mutable struct BPCGStep <: UpdateStep
     phi::Float64
 end
 
-function Base.copy(::FrankWolfeStep)
-    return FrankWolfeStep()
-end
+Base.copy(::FrankWolfeStep) = FrankWolfeStep()
 
 function Base.copy(obj::BPCGStep)
     if obj.active_set === nothing
-        return BPCGStep(false, nothing, obj.renorm_interval, obj.lazy_tolerance, Inf)
+        return BPCGStep(obj.lazy, nothing, obj.renorm_interval, obj.lazy_tolerance, obj.phi)
     else
         return BPCGStep(obj.lazy, copy(obj.active_set), obj.renorm_interval, obj.lazy_tolerance, obj.phi)
     end
 end
 
 BPCGStep() = BPCGStep(false, nothing, 1000, 2.0, Inf)
+BPCGStep(lazy::Bool) = BPCGStep(lazy, nothing, 1000, 2.0, Inf)
 
 function update_iterate(
     ::FrankWolfeStep,
@@ -324,7 +355,7 @@ function update_iterate(
 
     # minor modification from original paper for improved sparsity
     # (proof follows with minor modification when estimating the step)
-    if local_gap > s.phi / s.lazy_tolerance
+    if local_gap > max(s.phi / s.lazy_tolerance, 0) # Robust condition to not drop the zero-vector if the dual_gap is negative by inaccuracy
         d = muladd_memory_mode(memory_mode, d, a, v_local)
         vertex_taken = v_local
         gamma_max = a_lambda
diff --git a/test/alternating_methods_tests.jl b/test/alternating_methods_tests.jl
index 21deecbe4..7bf9487a6 100644
--- a/test/alternating_methods_tests.jl
+++ b/test/alternating_methods_tests.jl
@@ -223,7 +223,7 @@ end
         grad!,
         (lmo1, lmo2),
         ones(n),
-        update_step=FrankWolfe.BPCGStep(true, nothing, 1000, false, Inf),
+        update_step=FrankWolfe.BPCGStep(true),
     )
 
     @test abs(x.blocks[1][1]) < 1e-6

From 1ac1797d59ac9f5df5d882ff811be5f3f81bbf90 Mon Sep 17 00:00:00 2001
From: Sebastian Pokutta <23001135+pokutta@users.noreply.github.com>
Date: Fri, 25 Oct 2024 11:12:54 +0200
Subject: [PATCH 06/21] BPCG with direct solve extension (#507)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* added direct solve feature BPCG via LP solver

* adjusted for arbitrary LP solver

* fixed deps

* cleanup and comment

* minor

* added reporting of direct solve step

* chose highs as standard solver

* added sparsification

* added sparsification code

* cleanup

* minor cleanup

* minor

* added generalized direct_solve

* clean up, docu, additional direct_solve

* docstrings fixed?

* sparsifier active set (#508)

* sparsifier active set

* fix typo

* added sparsifying tests

* generic tolerane

* remove sparsification

* format

* HiGHS dep

* Quadratic solve structure (#511)

* sparsifier active set

* start working on LP AS

* first working quadratic

* remove quadratic LP from current

* cleanup

* HiGHS in test deps

* working reworked LP quadratic

* working version generic quadratic

* slow version generic quadratic

* faster term manipulation

* copy sufficient

* remove comment

* added test for quadratic

* minor

* simplify example

* clean up code, verify error with ASQuad

* Add update_weights! to fix direct solve with active_set_quadratic

* remove direct solve from BPCG

* rng changed

---------

Co-authored-by: Sébastien Designolle <sebastien.designolle@gmail.com>

* update example

* format

* clean up example

* fix callback

---------

Co-authored-by: Mathieu Besançon <mathieu.besancon@gmail.com>
Co-authored-by: Sébastien Designolle <sebastien.designolle@gmail.com>
---
 Project.toml                                  |   3 +-
 examples/blended_pairwise_with_direct.jl      | 174 ++++++++
 ...wise_with_direct_non-standard-quadratic.jl | 146 +++++++
 examples/blended_pairwise_with_sparsify.jl    | 100 +++++
 examples/simple_stateless.jl                  |   2 +-
 src/FrankWolfe.jl                             |   2 +
 src/active_set.jl                             |   4 +
 src/active_set_quadratic.jl                   |  18 +-
 src/active_set_quadratic_direct_solve.jl      | 383 ++++++++++++++++++
 src/active_set_sparsifier.jl                  |  96 +++++
 src/blended_pairwise.jl                       |   2 -
 src/moi_oracle.jl                             |   2 +-
 src/utils.jl                                  |  71 ++++
 test/quadratic_lp_active_set.jl               | 113 ++++++
 test/runtests.jl                              |  14 +
 test/sparsifying_activeset.jl                 |  85 ++++
 16 files changed, 1206 insertions(+), 9 deletions(-)
 create mode 100644 examples/blended_pairwise_with_direct.jl
 create mode 100644 examples/blended_pairwise_with_direct_non-standard-quadratic.jl
 create mode 100644 examples/blended_pairwise_with_sparsify.jl
 create mode 100644 src/active_set_quadratic_direct_solve.jl
 create mode 100644 src/active_set_sparsifier.jl
 create mode 100644 test/quadratic_lp_active_set.jl
 create mode 100644 test/sparsifying_activeset.jl

diff --git a/Project.toml b/Project.toml
index e56223008..ca298fa1a 100644
--- a/Project.toml
+++ b/Project.toml
@@ -43,6 +43,7 @@ DynamicPolynomials = "7c1d4256-1411-5781-91ec-d7bc3513ac07"
 FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 GLPK = "60bf3e95-4087-53dc-ae20-288a0d20c6a6"
+HiGHS = "87dc4568-4c63-4d18-b0c0-bb2238e4078b"
 Hypatia = "b99e6be6-89ff-11e8-14f8-45c827f4f8f2"
 JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
 JuMP = "4076af6c-e467-56ae-b986-b466b2749572"
@@ -58,4 +59,4 @@ Tullio = "bc48ee85-29a4-5162-ae0b-a64e1601d4bc"
 ZipFile = "a5390f91-8eb1-5f08-bee0-b1d1ffed6cea"
 
 [targets]
-test = ["CSV", "Combinatorics", "DataFrames", "Distributions", "DoubleFloats", "DynamicPolynomials", "FiniteDifferences", "ForwardDiff", "GLPK", "JSON", "JuMP", "LaTeXStrings", "MAT", "MultivariatePolynomials", "Plots", "PlotThemes", "Polyhedra", "ReverseDiff", "ZipFile", "Test", "Tullio", "Clp", "Hypatia"]
+test = ["CSV", "Combinatorics", "DataFrames", "Distributions", "DoubleFloats", "DynamicPolynomials", "FiniteDifferences", "ForwardDiff", "GLPK", "HiGHS", "JSON", "JuMP", "LaTeXStrings", "MAT", "MultivariatePolynomials", "Plots", "PlotThemes", "Polyhedra", "ReverseDiff", "ZipFile", "Test", "Tullio", "Clp", "Hypatia"]
diff --git a/examples/blended_pairwise_with_direct.jl b/examples/blended_pairwise_with_direct.jl
new file mode 100644
index 000000000..9a30bde76
--- /dev/null
+++ b/examples/blended_pairwise_with_direct.jl
@@ -0,0 +1,174 @@
+#=
+This example demonstrates the use of the Blended Pairwise Conditional Gradient algorithm
+with direct solve steps for a quadratic optimization problem over a sparse polytope. 
+
+Note the special structure of f(x) =  norm(x - x0)^2 that we assume here
+
+The example showcases how the algorithm balances between:
+- Pairwise steps for efficient optimization
+- Periodic direct solves for handling the quadratic objective
+- Lazy (approximate) linear minimization steps for improved iteration complexity
+
+It also demonstrates how to set up custom callbacks for tracking algorithm progress.
+=#
+
+using FrankWolfe
+using LinearAlgebra
+using Random
+
+import HiGHS
+import MathOptInterface as MOI
+
+include("../examples/plot_utils.jl")
+
+n = Int(1e4)
+k = 10_000
+
+s = 10
+@info "Seed $s"
+Random.seed!(s)
+
+xpi = rand(n);
+total = sum(xpi);
+
+const xp = xpi ./ total;
+
+f(x) = norm(x - xp)^2
+function grad!(storage, x)
+    @. storage = 2 * (x - xp)
+end
+
+lmo = FrankWolfe.KSparseLMO(5, 1.0)
+
+const x00 = FrankWolfe.compute_extreme_point(lmo, rand(n))
+
+function build_callback(trajectory_arr)
+    return function callback(state, active_set, args...)
+        return push!(trajectory_arr, (FrankWolfe.callback_state(state)..., length(active_set)))
+    end
+end
+
+
+trajectoryBPCG_standard = []
+@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    copy(x00),
+    max_iteration=k,
+    line_search=FrankWolfe.Shortstep(2.0),
+    verbose=true,
+    callback=build_callback(trajectoryBPCG_standard),
+);
+
+# Just projection quadratic
+trajectoryBPCG_quadratic = []
+as_quad = FrankWolfe.ActiveSetQuadratic([(1.0, copy(x00))], 2 * LinearAlgebra.I, -2xp)
+@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    as_quad,
+    max_iteration=k,
+    line_search=FrankWolfe.Shortstep(2.0),
+    verbose=true,
+    callback=build_callback(trajectoryBPCG_quadratic),
+);
+
+as_quad = FrankWolfe.ActiveSetQuadratic([(1.0, copy(x00))], 2 * LinearAlgebra.I, -2xp)
+
+# with quadratic active set
+trajectoryBPCG_quadratic_as = []
+@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    as_quad,
+    max_iteration=k,
+    line_search=FrankWolfe.Shortstep(2.0),
+    verbose=true,
+    callback=build_callback(trajectoryBPCG_quadratic_as),
+);
+
+as_quad_direct = FrankWolfe.ActiveSetQuadraticLinearSolve(
+    [(1.0, copy(x00))],
+    2 * LinearAlgebra.I,
+    -2xp,
+    MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)),
+)
+
+# with LP acceleration
+trajectoryBPCG_quadratic_direct = []
+@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    as_quad_direct,
+    max_iteration=k,
+    line_search=FrankWolfe.Shortstep(2.0),
+    verbose=true,
+    callback=build_callback(trajectoryBPCG_quadratic_direct),
+);
+
+as_quad_direct_generic = FrankWolfe.ActiveSetQuadraticLinearSolve(
+    [(1.0, copy(x00))],
+    2 * Diagonal(ones(length(xp))),
+    -2xp,
+    MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)),
+)
+
+# with LP acceleration
+trajectoryBPCG_quadratic_direct_generic = []
+@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    as_quad_direct_generic,
+    max_iteration=k,
+    line_search=FrankWolfe.Shortstep(2.0),
+    verbose=true,
+    callback=build_callback(trajectoryBPCG_quadratic_direct_generic),
+);
+
+as_quad_direct_basic_as = FrankWolfe.ActiveSetQuadraticLinearSolve(
+    FrankWolfe.ActiveSet([1.0], [copy(x00)], collect(x00)),
+    2 * LinearAlgebra.I,
+    -2xp,
+    MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)),
+)
+
+# with LP acceleration
+trajectoryBPCG_quadratic_noqas = []
+
+@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    as_quad_direct_basic_as,
+    max_iteration=k,
+    line_search=FrankWolfe.Shortstep(2.0),
+    verbose=true,
+    callback=build_callback(trajectoryBPCG_quadratic_noqas),
+);
+
+
+# Update the data and labels for plotting
+data_trajectories = [
+    trajectoryBPCG_standard,
+    trajectoryBPCG_quadratic,
+    trajectoryBPCG_quadratic_as,
+    trajectoryBPCG_quadratic_direct,
+    trajectoryBPCG_quadratic_direct_generic,
+    trajectoryBPCG_quadratic_noqas,
+]
+labels_trajectories = [
+    "BPCG (Standard)",
+    "BPCG (Specific Direct)",
+    "AS_Quad",
+    "Reloaded",
+    "Reloaded_generic",
+    "Reloaded_noqas",
+]
+
+# Plot trajectories
+plot_trajectories(data_trajectories, labels_trajectories, xscalelog=false)
diff --git a/examples/blended_pairwise_with_direct_non-standard-quadratic.jl b/examples/blended_pairwise_with_direct_non-standard-quadratic.jl
new file mode 100644
index 000000000..ab934edd2
--- /dev/null
+++ b/examples/blended_pairwise_with_direct_non-standard-quadratic.jl
@@ -0,0 +1,146 @@
+#=
+This example demonstrates the use of the Blended Pairwise Conditional Gradient algorithm
+with direct solve steps for a quadratic optimization problem over a sparse polytope which is not standard quadratic.
+
+The example showcases how the algorithm balances between:
+- Pairwise steps for efficient optimization
+- Periodic direct solves for handling the quadratic objective
+- Lazy (approximate) linear minimization steps for improved iteration complexity
+
+It also demonstrates how to set up custom callbacks for tracking algorithm progress.
+=#
+
+using FrankWolfe
+using LinearAlgebra
+using Random
+
+import HiGHS
+import MathOptInterface as MOI
+
+include("../examples/plot_utils.jl")
+
+n = Int(1e2)
+k = 10000
+
+# s = rand(1:100)
+s = 10
+@info "Seed $s"
+Random.seed!(s)
+
+A = let
+    A = randn(n, n)
+    A' * A
+end
+@assert isposdef(A) == true
+
+const y = Random.rand(Bool, n) * 0.6 .+ 0.3
+
+function f(x)
+    d = x - y
+    return dot(d, A, d)
+end
+
+function grad!(storage, x)
+    mul!(storage, A, x)
+    return mul!(storage, A, y, -2, 2)
+end
+
+
+# lmo = FrankWolfe.KSparseLMO(5, 1000.0)
+
+## other LMOs to try
+# lmo_big = FrankWolfe.KSparseLMO(100, big"1.0")
+# lmo = FrankWolfe.LpNormLMO{Float64,5}(100.0)
+# lmo = FrankWolfe.ProbabilitySimplexOracle(100.0);
+lmo = FrankWolfe.UnitSimplexOracle(10000.0);
+
+x00 = FrankWolfe.compute_extreme_point(lmo, rand(n))
+
+
+function build_callback(trajectory_arr)
+    return function callback(state, active_set, args...)
+        return push!(trajectory_arr, (FrankWolfe.callback_state(state)..., length(active_set)))
+    end
+end
+
+
+trajectoryBPCG_standard = []
+callback = build_callback(trajectoryBPCG_standard)
+
+x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    copy(x00),
+    max_iteration=k,
+    line_search=FrankWolfe.Adaptive(),
+    print_iter=k / 10,
+    memory_mode=FrankWolfe.InplaceEmphasis(),
+    verbose=true,
+    trajectory=true,
+    callback=callback,
+);
+
+active_set_quadratic_automatic = FrankWolfe.ActiveSetQuadraticLinearSolve(
+    [(1.0, copy(x00))],
+    grad!,
+    MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)),
+    scheduler=FrankWolfe.LogScheduler(start_time=100, scaling_factor=1.2, max_interval=100),
+)
+trajectoryBPCG_quadratic_automatic = []
+x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    active_set_quadratic_automatic,
+    max_iteration=k,
+    verbose=true,
+    callback=build_callback(trajectoryBPCG_quadratic_automatic),
+);
+
+active_set_quadratic_automatic2 = FrankWolfe.ActiveSetQuadraticLinearSolve(
+    [(1.0, copy(x00))],
+    grad!,
+    MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)),
+    scheduler=FrankWolfe.LogScheduler(start_time=10, scaling_factor=2),
+)
+trajectoryBPCG_quadratic_automatic2 = []
+x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    active_set_quadratic_automatic2,
+    max_iteration=k,
+    verbose=true,
+    callback=build_callback(trajectoryBPCG_quadratic_automatic2),
+);
+
+
+active_set_quadratic_automatic_standard = FrankWolfe.ActiveSetQuadraticLinearSolve(
+    FrankWolfe.ActiveSet([(1.0, copy(x00))]),
+    grad!,
+    MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)),
+    scheduler=FrankWolfe.LogScheduler(start_time=10, scaling_factor=2),
+)
+trajectoryBPCG_quadratic_automatic_standard = []
+x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    active_set_quadratic_automatic_standard,
+    max_iteration=k,
+    verbose=true,
+    callback=build_callback(trajectoryBPCG_quadratic_automatic_standard),
+);
+
+
+dataSparsity = [
+    trajectoryBPCG_standard,
+    trajectoryBPCG_quadratic_automatic,
+    trajectoryBPCG_quadratic_automatic_standard,
+]
+labelSparsity = ["BPCG (Standard)", "AS_Quad", "AS_Standard"]
+
+
+# Plot trajectories
+plot_trajectories(dataSparsity, labelSparsity, xscalelog=false)
diff --git a/examples/blended_pairwise_with_sparsify.jl b/examples/blended_pairwise_with_sparsify.jl
new file mode 100644
index 000000000..6d42b7de4
--- /dev/null
+++ b/examples/blended_pairwise_with_sparsify.jl
@@ -0,0 +1,100 @@
+#=
+This example demonstrates the use of the Blended Pairwise Conditional Gradient algorithm
+with direct solve steps for a quadratic optimization problem over a sparse polytope.
+
+The example showcases how the algorithm balances between:
+- Pairwise steps for efficient optimization
+- Periodic direct solves for handling the quadratic objective
+- Lazy (approximate) linear minimization steps for improved iteration complexity
+
+It also demonstrates how to set up custom callbacks for tracking algorithm progress.
+=#
+
+using FrankWolfe
+using LinearAlgebra
+using Random
+
+import HiGHS
+
+include("../examples/plot_utils.jl")
+
+n = Int(1e4)
+k = 10000
+
+s = 10
+@info "Seed $s"
+Random.seed!(s)
+
+xpi = rand(n);
+total = sum(xpi);
+
+# here the optimal solution lies in the interior if you want an optimal solution on a face and not the interior use:
+# const xp = xpi;
+
+const xp = xpi ./ total;
+
+f(x) = norm(x - xp)^2
+function grad!(storage, x)
+    @. storage = 2 * (x - xp)
+end
+
+# lmo = FrankWolfe.KSparseLMO(2, 1.0)
+
+## other LMOs to try
+# lmo_big = FrankWolfe.KSparseLMO(100, big"1.0")
+lmo = FrankWolfe.LpNormLMO{Float64,5}(1.0)
+# lmo = FrankWolfe.ProbabilitySimplexOracle(1.0);
+# lmo = FrankWolfe.UnitSimplexOracle(1.0);
+
+x00 = FrankWolfe.compute_extreme_point(lmo, rand(n))
+
+function build_callback(trajectory_arr)
+    return function callback(state, active_set, args...)
+        return push!(trajectory_arr, (FrankWolfe.callback_state(state)..., length(active_set)))
+    end
+end
+
+FrankWolfe.benchmark_oracles(f, grad!, () -> randn(n), lmo; k=100)
+
+trajectoryBPCG_standard = []
+@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    copy(x00),
+    max_iteration=k,
+    line_search=FrankWolfe.Shortstep(2.0),
+    print_iter=k / 10,
+    memory_mode=FrankWolfe.InplaceEmphasis(),
+    verbose=true,
+    trajectory=true,
+    callback=build_callback(trajectoryBPCG_standard),
+);
+
+trajectoryBPCG_as_sparse = []
+active_set_sparse = FrankWolfe.ActiveSetSparsifier(
+    FrankWolfe.ActiveSet([1.0], [x00], similar(x00)),
+    HiGHS.Optimizer(),
+)
+
+@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    copy(x00),
+    max_iteration=k,
+    line_search=FrankWolfe.Shortstep(2.0),
+    print_iter=k / 10,
+    memory_mode=FrankWolfe.InplaceEmphasis(),
+    verbose=true,
+    trajectory=true,
+    callback=build_callback(trajectoryBPCG_as_sparse),
+);
+
+# Reduction primal/dual error vs. sparsity of solution
+
+dataSparsity = [trajectoryBPCG_standard, trajectoryBPCG_quadratic]
+labelSparsity = ["BPCG (Standard)", "BPCG (Sparsify)"]
+
+# Plot sparsity
+plot_sparsity(dataSparsity, labelSparsity, legend_position=:topright)
diff --git a/examples/simple_stateless.jl b/examples/simple_stateless.jl
index 6a550a963..6bf230e26 100644
--- a/examples/simple_stateless.jl
+++ b/examples/simple_stateless.jl
@@ -71,7 +71,7 @@ plot_trajectories([trajectory_simple[1:end], trajectory_restart[1:end]], ["simpl
 
 # simple step iterations about 33% faster
 
-@test line_search.factor <= 44
+@test line_search.factor <= 51
 
 x, v, primal, dual_gap, trajectory_restart_highpres = FrankWolfe.frank_wolfe(
     f,
diff --git a/src/FrankWolfe.jl b/src/FrankWolfe.jl
index e6d29c3cc..cc2fef145 100644
--- a/src/FrankWolfe.jl
+++ b/src/FrankWolfe.jl
@@ -34,6 +34,8 @@ include("moi_oracle.jl")
 include("function_gradient.jl")
 include("active_set.jl")
 include("active_set_quadratic.jl")
+include("active_set_quadratic_direct_solve.jl")
+include("active_set_sparsifier.jl")
 
 include("blended_cg.jl")
 include("afw.jl")
diff --git a/src/active_set.jl b/src/active_set.jl
index 62393b47f..c02145666 100644
--- a/src/active_set.jl
+++ b/src/active_set.jl
@@ -326,3 +326,7 @@ function compute_active_set_iterate!(active_set::AbstractActiveSet{<:ScaledHotVe
     end
     return active_set.x
 end
+
+function update_weights!(as::AbstractActiveSet, new_weights)
+    as.weights .= new_weights
+end
diff --git a/src/active_set_quadratic.jl b/src/active_set_quadratic.jl
index 8f26b632e..a616dd1fb 100644
--- a/src/active_set_quadratic.jl
+++ b/src/active_set_quadratic.jl
@@ -49,7 +49,8 @@ function detect_quadratic_function(grad!, x0; test=true)
 end
 
 function ActiveSetQuadratic(tuple_values::AbstractVector{Tuple{R,AT}}, grad!::Function) where {AT,R}
-    return ActiveSetQuadratic(tuple_values, detect_quadratic_function(grad!, tuple_values[1][2])...)
+    A, b = detect_quadratic_function(grad!, tuple_values[1][2])
+    return ActiveSetQuadratic(tuple_values, A, b)
 end
 
 function ActiveSetQuadratic(tuple_values::AbstractVector{Tuple{R,AT}}, A::H, b) where {AT,R,H}
@@ -71,13 +72,15 @@ function ActiveSetQuadratic(tuple_values::AbstractVector{Tuple{R,AT}}, A::H, b)
         dots_b[idx] = fast_dot(b, atoms[idx])
     end
     x = similar(b)
-    as = ActiveSetQuadratic{AT,R,typeof(x),H}(weights, atoms, x, A, b, dots_x, dots_A, dots_b, weights_prev, modified)
+    as = ActiveSetQuadratic(weights, atoms, x, A, b, dots_x, dots_A, dots_b, weights_prev, modified)
+
     compute_active_set_iterate!(as)
     return as
 end
 
-function ActiveSetQuadratic{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, grad!) where {AT,R}
-    return ActiveSetQuadratic{AT,R}(tuple_values, detect_quadratic_function(grad!, tuple_values[1][2])...)
+function ActiveSetQuadratic{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, grad!::Function) where {AT,R}
+    A, b = detect_quadratic_function(grad!, tuple_values[1][2])
+    return ActiveSetQuadratic{AT,R}(tuple_values, A, b)
 end
 
 function ActiveSetQuadratic{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, A::H, b) where {AT,R,H}
@@ -116,6 +119,7 @@ function Base.:*(a::Identity, b)
         return a.λ * b
     end
 end
+
 function ActiveSetQuadratic(tuple_values::AbstractVector{Tuple{R,AT}}, A::UniformScaling, b) where {AT,R}
     return ActiveSetQuadratic(tuple_values, Identity(A.λ), b)
 end
@@ -147,6 +151,7 @@ function Base.push!(as::ActiveSetQuadratic{AT,R}, (λ, a)) where {AT,R}
     return as
 end
 
+# TODO multi-indices version
 function Base.deleteat!(as::ActiveSetQuadratic, idx::Int)
     @inbounds for i in 1:idx-1
         as.dots_x[i] -= as.weights_prev[idx] * as.dots_A[idx][i]
@@ -311,3 +316,8 @@ function update_active_set_quadratic!(warm_as::ActiveSetQuadratic{AT,R,IT,H}, b)
     return warm_as
 end
 
+function update_weights!(as::ActiveSetQuadratic, new_weights)
+    as.weights_prev .= as.weights
+    as.weights .= new_weights
+    as.modified .= true
+end
diff --git a/src/active_set_quadratic_direct_solve.jl b/src/active_set_quadratic_direct_solve.jl
new file mode 100644
index 000000000..fef53a077
--- /dev/null
+++ b/src/active_set_quadratic_direct_solve.jl
@@ -0,0 +1,383 @@
+
+"""
+    ActiveSetQuadraticLinearSolve{AT, R, IT}
+
+Represents an active set of extreme vertices collected in a FW algorithm,
+along with their coefficients `(λ_i, a_i)`.
+`R` is the type of the `λ_i`, `AT` is the type of the atoms `a_i`.
+The iterate `x = ∑λ_i a_i` is stored in x with type `IT`.
+The objective function is assumed to be of the form `f(x)=½⟨x,Ax⟩+⟨b,x⟩+c`
+so that the gradient is `∇f(x)=Ax+b`.
+
+This active set stores an inner `active_set` that keeps track of the current set of vertices and convex decomposition.
+It therefore delegates all update, deletion, and addition operations to this inner active set.
+The `weight`, `atoms`, and `x` fields should only be accessed to read and are effectively the same objects as those in the inner active set.
+
+The structure also contains a scheduler struct which is called with the `should_solve_lp` function.
+To define a new frequency at which the LP should be solved, one can define another scheduler struct and implement the corresponding method.
+"""
+struct ActiveSetQuadraticLinearSolve{
+    AT,
+    R<:Real,
+    IT,
+    H,
+    BT,
+    OT<:MOI.AbstractOptimizer,
+    AS<:AbstractActiveSet,
+    SF,
+} <: AbstractActiveSet{AT,R,IT}
+    weights::Vector{R}
+    atoms::Vector{AT}
+    x::IT
+    A::H # Hessian matrix
+    b::BT # linear term
+    active_set::AS
+    lp_optimizer::OT
+    scheduler::SF
+    counter::Base.RefValue{Int}
+end
+
+"""
+    ActiveSetQuadraticLinearSolve(tuple_values::Vector{Tuple{R,AT}}, grad!::Function, lp_optimizer)
+
+Creates an ActiveSetQuadraticLinearSolve by computing the Hessian and linear term from `grad!`.
+"""
+function ActiveSetQuadraticLinearSolve(
+    tuple_values::Vector{Tuple{R,AT}},
+    grad!::Function,
+    lp_optimizer;
+    scheduler=LogScheduler(),
+) where {AT,R}
+    A, b = detect_quadratic_function(grad!, tuple_values[1][2])
+    return ActiveSetQuadraticLinearSolve(tuple_values, A, b, lp_optimizer, scheduler=scheduler)
+end
+
+"""
+    ActiveSetQuadraticLinearSolve(tuple_values::Vector{Tuple{R,AT}}, A, b, lp_optimizer)
+
+Creates an `ActiveSetQuadraticLinearSolve` from the given Hessian `A`, linear term `b` and `lp_optimizer` by creating an inner `ActiveSetQuadratic` active set.
+"""
+function ActiveSetQuadraticLinearSolve(
+    tuple_values::Vector{Tuple{R,AT}},
+    A::H,
+    b,
+    lp_optimizer;
+    scheduler=LogScheduler(),
+) where {AT,R,H}
+    inner_as = ActiveSetQuadratic(tuple_values, A, b)
+    return ActiveSetQuadraticLinearSolve(
+        inner_as.weights,
+        inner_as.atoms,
+        inner_as.x,
+        inner_as.A,
+        inner_as.b,
+        inner_as,
+        lp_optimizer,
+        scheduler,
+        Ref(0),
+    )
+end
+
+function ActiveSetQuadraticLinearSolve(
+    inner_as::AbstractActiveSet,
+    A,
+    b,
+    lp_optimizer;
+    scheduler=LogScheduler(),
+)
+    as = ActiveSetQuadraticLinearSolve(
+        inner_as.weights,
+        inner_as.atoms,
+        inner_as.x,
+        A,
+        b,
+        inner_as,
+        lp_optimizer,
+        scheduler,
+        Ref(0),
+    )
+    compute_active_set_iterate!(as)
+    return as
+end
+
+function ActiveSetQuadraticLinearSolve(
+    inner_as::AbstractActiveSet,
+    A::LinearAlgebra.UniformScaling,
+    b,
+    lp_optimizer;
+    scheduler=LogScheduler(),
+)
+    as = ActiveSetQuadraticLinearSolve(
+        inner_as.weights,
+        inner_as.atoms,
+        inner_as.x,
+        A,
+        b,
+        inner_as,
+        lp_optimizer,
+        scheduler,
+        Ref(0),
+    )
+    compute_active_set_iterate!(as)
+    return as
+end
+
+function ActiveSetQuadraticLinearSolve(
+    inner_as::AbstractActiveSet,
+    grad!::Function,
+    lp_optimizer;
+    scheduler=LogScheduler(),
+)
+    A, b = detect_quadratic_function(grad!, inner_as.atoms[1])
+    return ActiveSetQuadraticLinearSolve(inner_as, A, b, lp_optimizer; scheduler=scheduler)
+end
+
+function ActiveSetQuadraticLinearSolve{AT,R}(
+    tuple_values::Vector{<:Tuple{<:Number,<:Any}},
+    grad!::Function,
+    lp_optimizer;
+    scheduler=LogScheduler(),
+) where {AT,R}
+    A, b = detect_quadratic_function(grad!, tuple_values[1][2])
+    return ActiveSetQuadraticLinearSolve{AT,R}(
+        tuple_values,
+        A,
+        b,
+        lp_optimizer;
+        scheduler=scheduler,
+    )
+end
+
+function ActiveSetQuadraticLinearSolve{AT,R}(
+    tuple_values::Vector{<:Tuple{<:Number,<:Any}},
+    A::H,
+    b,
+    lp_optimizer;
+    scheduler=LogScheduler(),
+) where {AT,R,H}
+    inner_as = ActiveSetQuadratic{AT,R}(tuple_values, A, b)
+    as = ActiveSetQuadraticLinearSolve{AT,R,typeof(x),H}(
+        inner_as.weights,
+        inner_as.atoms,
+        inner_as.x,
+        A,
+        b,
+        inner_as,
+        lp_optimizer,
+        scheduler,
+        Ref(0),
+    )
+    compute_active_set_iterate!(as)
+    return as
+end
+
+function ActiveSetQuadraticLinearSolve(
+    tuple_values::Vector{Tuple{R,AT}},
+    A::UniformScaling,
+    b,
+    lp_optimizer;
+    scheduler=LogScheduler(),
+) where {AT,R}
+    return ActiveSetQuadraticLinearSolve(
+        tuple_values,
+        Identity(A.λ),
+        b,
+        lp_optimizer;
+        scheduler=scheduler,
+    )
+end
+function ActiveSetQuadraticLinearSolve{AT,R}(
+    tuple_values::Vector{<:Tuple{<:Number,<:Any}},
+    A::UniformScaling,
+    b,
+    lp_optimizer;
+    scheduler=LogScheduler(),
+) where {AT,R}
+    return ActiveSetQuadraticLinearSolve{AT,R}(
+        tuple_values,
+        Identity(A.λ),
+        b,
+        lp_optimizer;
+        scheduler=scheduler,
+    )
+end
+
+# all mutating functions are delegated to the inner active set
+
+Base.push!(as::ActiveSetQuadraticLinearSolve, tuple) = push!(as.active_set, tuple)
+
+Base.deleteat!(as::ActiveSetQuadraticLinearSolve, idx) = deleteat!(as.active_set, idx)
+
+Base.empty!(as::ActiveSetQuadraticLinearSolve) = empty!(as.active_set)
+
+function active_set_update!(
+    as::ActiveSetQuadraticLinearSolve{AT,R},
+    lambda,
+    atom,
+    renorm=true,
+    idx=nothing;
+    weight_purge_threshold=weight_purge_threshold_default(R),
+    add_dropped_vertices=false,
+    vertex_storage=nothing,
+) where {AT,R}
+    if idx === nothing
+        idx = find_atom(as, atom)
+    end
+    active_set_update!(
+        as.active_set,
+        lambda,
+        atom,
+        renorm,
+        idx;
+        weight_purge_threshold=weight_purge_threshold,
+        add_dropped_vertices=add_dropped_vertices,
+        vertex_storage=vertex_storage,
+    )
+    # new atom introduced, we can solve the auxiliary LP
+    if idx < 0
+        as.counter[] += 1
+        if should_solve_lp(as, as.scheduler)
+            solve_quadratic_activeset_lp!(as)
+        end
+    end
+    return as
+end
+
+active_set_renormalize!(as::ActiveSetQuadraticLinearSolve) = active_set_renormalize!(as.active_set)
+
+function active_set_argmin(as::ActiveSetQuadraticLinearSolve, direction)
+    return active_set_argmin(as.active_set, direction)
+end
+
+function active_set_argminmax(as::ActiveSetQuadraticLinearSolve, direction; Φ=0.5)
+    return active_set_argminmax(as.active_set, direction; Φ=Φ)
+end
+
+# generic quadratic with quadratic information provided
+
+"""
+    solve_quadratic_activeset_lp!(as::ActiveSetQuadraticLinearSolve{AT, R, IT, H}))
+
+Solves the auxiliary LP over the current active set.
+The method is specialized by type `H` of the Hessian matrix `A`.
+"""
+function solve_quadratic_activeset_lp!(
+    as::ActiveSetQuadraticLinearSolve{AT,R,IT,<:AbstractMatrix},
+) where {AT,R,IT}
+    nv = length(as)
+    o = as.lp_optimizer
+    MOI.empty!(o)
+    λ = MOI.add_variables(o, nv)
+    # λ ≥ 0, ∑ λ == 1
+    MOI.add_constraint.(o, λ, MOI.GreaterThan(0.0))
+    sum_of_variables = MOI.ScalarAffineFunction(MOI.ScalarAffineTerm.(1.0, λ), 0.0)
+    MOI.add_constraint(o, sum_of_variables, MOI.EqualTo(1.0))
+    # Vᵗ A V λ == -Vᵗ b
+    for atom in as.atoms
+        lhs = MOI.ScalarAffineFunction{Float64}([], 0.0)
+        Base.sizehint!(lhs.terms, nv)
+        # replaces direct sum because of MOI and MutableArithmetic slow sums
+        for j in 1:nv
+            push!(lhs.terms, MOI.ScalarAffineTerm(fast_dot(atom, as.A, as.atoms[j]), λ[j]))
+        end
+        rhs = -dot(atom, as.b)
+        MOI.add_constraint(o, lhs, MOI.EqualTo(rhs))
+    end
+    MOI.set(o, MOI.ObjectiveFunction{typeof(sum_of_variables)}(), sum_of_variables)
+    MOI.set(o, MOI.ObjectiveSense(), MOI.MIN_SENSE)
+    MOI.optimize!(o)
+    if MOI.get(o, MOI.TerminationStatus()) ∉ (MOI.OPTIMAL, MOI.FEASIBLE_POINT, MOI.ALMOST_OPTIMAL)
+        return as
+    end
+    indices_to_remove = Int[]
+    new_weights = R[]
+    for idx in eachindex(λ)
+        weight_value = MOI.get(o, MOI.VariablePrimal(), λ[idx])
+        if weight_value <= 1e-5
+            push!(indices_to_remove, idx)
+        else
+            push!(new_weights, weight_value)
+        end
+    end
+    deleteat!(as.active_set, indices_to_remove)
+    @assert length(as) == length(new_weights)
+    update_weights!(as.active_set, new_weights)
+    active_set_cleanup!(as)
+    active_set_renormalize!(as)
+    compute_active_set_iterate!(as)
+    return as
+end
+
+# special case of scaled identity Hessian
+
+function solve_quadratic_activeset_lp!(
+    as::ActiveSetQuadraticLinearSolve{AT,R,IT,<:Union{Identity,LinearAlgebra.UniformScaling}},
+) where {AT,R,IT}
+    hessian_scaling = as.A.λ
+    nv = length(as)
+    o = as.lp_optimizer
+    MOI.empty!(o)
+    λ = MOI.add_variables(o, nv)
+    # λ ≥ 0, ∑ λ == 1
+    MOI.add_constraint.(o, λ, MOI.GreaterThan(0.0))
+    sum_of_variables = MOI.ScalarAffineFunction(MOI.ScalarAffineTerm.(1.0, λ), 0.0)
+    MOI.add_constraint(o, sum_of_variables, MOI.EqualTo(1.0))
+    # a Aᵗ A λ == -Aᵗ b
+    for atom in as.atoms
+        lhs = MOI.ScalarAffineFunction{Float64}([], 0.0)
+        Base.sizehint!(lhs.terms, nv)
+        # replaces direct sum because of MOI and MutableArithmetic slow sums
+        for j in 1:nv
+            push!(lhs.terms, MOI.ScalarAffineTerm(hessian_scaling * dot(atom, as.atoms[j]), λ[j]))
+        end
+        rhs = -dot(atom, as.b)
+        MOI.add_constraint(o, lhs, MOI.EqualTo(rhs))
+    end
+    MOI.set(o, MOI.ObjectiveFunction{typeof(sum_of_variables)}(), sum_of_variables)
+    MOI.set(o, MOI.ObjectiveSense(), MOI.MIN_SENSE)
+    MOI.optimize!(o)
+    if MOI.get(o, MOI.TerminationStatus()) ∉ (MOI.OPTIMAL, MOI.FEASIBLE_POINT, MOI.ALMOST_OPTIMAL)
+        return as
+    end
+    indices_to_remove = Int[]
+    new_weights = R[]
+    for idx in eachindex(λ)
+        weight_value = MOI.get(o, MOI.VariablePrimal(), λ[idx])
+        if weight_value <= 2 * weight_purge_threshold_default(typeof(weight_value))
+            push!(indices_to_remove, idx)
+        else
+            push!(new_weights, weight_value)
+        end
+    end
+    deleteat!(as.active_set, indices_to_remove)
+    @assert length(as) == length(new_weights)
+    update_weights!(as.active_set, new_weights)
+    active_set_cleanup!(as)
+    @assert all(>=(0), new_weights)
+    active_set_renormalize!(as)
+    compute_active_set_iterate!(as)
+    return as
+end
+
+struct LogScheduler{T}
+    start_time::Int
+    scaling_factor::T
+    max_interval::Int
+    current_interval::Base.RefValue{Int}
+    last_solve_counter::Base.RefValue{Int}
+end
+
+LogScheduler(; start_time=20, scaling_factor=1.5, max_interval=1000) =
+    LogScheduler(start_time, scaling_factor, max_interval, Ref(start_time), Ref(0))
+
+function should_solve_lp(as::ActiveSetQuadraticLinearSolve, scheduler::LogScheduler)
+    if as.counter[] - scheduler.last_solve_counter[] >= scheduler.current_interval[]
+        scheduler.last_solve_counter[] = as.counter[]
+        scheduler.current_interval[] = min(
+            round(Int, scheduler.scaling_factor * scheduler.current_interval[]),
+            scheduler.max_interval,
+        )
+        return true
+    end
+    return false
+end
diff --git a/src/active_set_sparsifier.jl b/src/active_set_sparsifier.jl
new file mode 100644
index 000000000..6bb523217
--- /dev/null
+++ b/src/active_set_sparsifier.jl
@@ -0,0 +1,96 @@
+struct ActiveSetSparsifier{AT,R,IT,AS<:AbstractActiveSet{AT,R,IT},OT<:MOI.AbstractOptimizer} <:
+       AbstractActiveSet{AT,R,IT}
+    active_set::AS
+    weights::Vector{R}
+    atoms::Vector{AT}
+    x::IT
+    optimizer::OT
+    minimum_vertices::Int
+    solve_frequency::Int
+    counter::Base.RefValue{Int}
+end
+
+function ActiveSetSparsifier(
+    active_set::AbstractActiveSet,
+    optimizer::MOI.AbstractOptimizer;
+    minimum_vertices=50,
+    solve_frequency=50,
+)
+    return ActiveSetSparsifier(
+        active_set,
+        active_set.weights,
+        active_set.atoms,
+        active_set.x,
+        optimizer,
+        minimum_vertices,
+        solve_frequency,
+        Ref(0),
+    )
+end
+
+function Base.push!(as::ActiveSetSparsifier, (λ, a))
+    return push!(as.active_set, (λ, a))
+end
+
+Base.deleteat!(as::ActiveSetSparsifier, idx::Int) = deleteat!(as.active_set, idx)
+
+Base.empty!(as::ActiveSetSparsifier) = empty!(as.active_set)
+
+function active_set_update!(
+    as::ActiveSetSparsifier{AS,OT,AT,R,IT},
+    lambda,
+    atom,
+    renorm=true,
+    idx=nothing;
+    kwargs...,
+) where {AS,OT,AT,R,IT}
+    active_set_update!(as.active_set, lambda, atom, renorm, idx; kwargs...)
+    n = length(as)
+    as.counter[] += 1
+    if n > as.minimum_vertices && mod(as.counter[], as.solve_frequency) == 0
+        # sparsifying active set
+        MOI.empty!(as.optimizer)
+        x0 = as.active_set.x
+        λ = MOI.add_variables(as.optimizer, n)
+        # λ ∈ Δ_n ⇔ λ ≥ 0, ∑ λ == 1
+        MOI.add_constraint.(as.optimizer, λ, MOI.GreaterThan(0.0))
+        MOI.add_constraint(as.optimizer, sum(λ; init=0.0), MOI.EqualTo(1.0))
+        x_sum = 0 * as.active_set.atoms[1]
+        for (idx, atom) in enumerate(as.active_set.atoms)
+            x_sum += λ[idx] * atom
+        end
+        for idx in eachindex(x_sum)
+            MOI.add_constraint(as.optimizer, x_sum[idx], MOI.EqualTo(x0[idx]))
+        end
+        # Set a dummy objective (minimize ∑λ)
+        dummy_objective = sum(λ; init=0.0)
+        MOI.set(as.optimizer, MOI.ObjectiveFunction{typeof(dummy_objective)}(), dummy_objective)
+        MOI.set(as.optimizer, MOI.ObjectiveSense(), MOI.MIN_SENSE)
+        MOI.optimize!(as.optimizer)
+        if MOI.get(as.optimizer, MOI.TerminationStatus()) in
+           (MOI.OPTIMAL, MOI.FEASIBLE_POINT, MOI.ALMOST_OPTIMAL)
+            indices_to_remove = Int[]
+            new_weights = R[]
+            for idx in eachindex(λ)
+                weight_value = MOI.get(as.optimizer, MOI.VariablePrimal(), λ[idx])
+                if weight_value <= sqrt(weight_purge_threshold_default(typeof(weight_value)))
+                    push!(indices_to_remove, idx)
+                else
+                    push!(new_weights, weight_value)
+                end
+            end
+            deleteat!(as.active_set.atoms, indices_to_remove)
+            deleteat!(as.active_set.weights, indices_to_remove)
+            @assert length(as) == length(new_weights)
+            as.active_set.weights .= new_weights
+            active_set_renormalize!(as)
+        end
+    end
+    return as
+end
+
+active_set_renormalize!(as::ActiveSetSparsifier) = active_set_renormalize!(as.active_set)
+
+active_set_argmin(as::ActiveSetSparsifier, direction) = active_set_argmin(as.active_set, direction)
+active_set_argminmax(as::ActiveSetSparsifier, direction; Φ=0.5) =
+    active_set_argminmax(as.active_set, direction; Φ=Φ)
diff --git a/src/blended_pairwise.jl b/src/blended_pairwise.jl
index e2efec4d2..a42862063 100644
--- a/src/blended_pairwise.jl
+++ b/src/blended_pairwise.jl
@@ -1,4 +1,3 @@
-
 """
     blended_pairwise_conditional_gradient(f, grad!, lmo, x0; kwargs...)
 
@@ -325,7 +324,6 @@ function blended_pairwise_conditional_gradient(
                     linesearch_workspace,
                     memory_mode,
                 )
-
                 if callback !== nothing
                     state = CallbackState(
                         t,
diff --git a/src/moi_oracle.jl b/src/moi_oracle.jl
index 875b8001d..1471bd6e7 100644
--- a/src/moi_oracle.jl
+++ b/src/moi_oracle.jl
@@ -1,6 +1,6 @@
 
 """
-    MathOptLMO{OT <: MOI.Optimizer} <: LinearMinimizationOracle
+    MathOptLMO{OT <: MOI.AbstractOptimizer} <: LinearMinimizationOracle
 
 Linear minimization oracle with feasible space defined through a MathOptInterface.Optimizer.
 The oracle call sets the direction and reruns the optimizer.
diff --git a/src/utils.jl b/src/utils.jl
index 629f55e15..c6e01fd6f 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -163,6 +163,77 @@ function fast_dot(A::Matrix{T1}, B::SparseArrays.SparseMatrixCSC{T2}) where {T1,
     return s
 end
 
+fast_dot(a, Q, b) = dot(a, Q, b)
+
+function fast_dot(a::SparseArrays.AbstractSparseVector{<:Real}, Q::Diagonal{<:Real}, b::AbstractVector{<:Real})
+    if a === b
+        return _fast_quadratic_form_symmetric(a, Q)
+    end
+    d = Q.diag
+    nzvals = SparseArrays.nonzeros(a)
+    nzinds = SparseArrays.nonzeroinds(a)
+    return sum(eachindex(nzvals); init=zero(eltype(a))) do nzidx
+        nzvals[nzidx] * d[nzinds[nzidx]] * b[nzinds[nzidx]]
+    end
+end
+
+function fast_dot(a::SparseArrays.AbstractSparseVector{<:Real}, Q::Diagonal{<:Real}, b::SparseArrays.AbstractSparseVector{<:Real})
+    if a === b
+        return _fast_quadratic_form_symmetric(a, Q)
+    end
+    n = length(a)
+    if length(b) != n
+        throw(
+            DimensionMismatch("Vector a has a length $n but b has a length $(length(b))")
+        )
+    end
+    anzind = SparseArrays.nonzeroinds(a)
+    bnzind = SparseArrays.nonzeroinds(b)
+    anzval = SparseArrays.nonzeros(a)
+    bnzval = SparseArrays.nonzeros(b)
+    s = zero(Base.promote_eltype(a, Q, b))
+
+    if isempty(anzind) || isempty(bnzind)
+        return s
+    end
+
+    a_idx = 1
+    b_idx = 1
+    a_idx_last = length(anzind)
+    b_idx_last = length(bnzind)
+
+    # go through the nonzero indices of a and b simultaneously
+    @inbounds while a_idx <= a_idx_last && b_idx <= b_idx_last
+        ia = anzind[a_idx]
+        ib = bnzind[b_idx]
+        if ia == ib
+            s += dot(anzval[a_idx], Q.diag[ia], bnzval[b_idx])
+            a_idx += 1
+            b_idx += 1
+        elseif ia < ib
+            a_idx += 1
+        else
+            b_idx += 1
+        end
+    end
+    return s
+end
+
+
+function _fast_quadratic_form_symmetric(a, Q)
+    d = Q.diag
+    if length(d) != length(a)
+        throw(DimensionMismatch())
+    end
+    nzvals = SparseArrays.nonzeros(a)
+    nzinds = SparseArrays.nonzeroinds(a)
+    s = zero(Base.promote_eltype(a, Q))
+    @inbounds for nzidx in eachindex(nzvals)
+        s += nzvals[nzidx]^2 * d[nzinds[nzidx]]
+    end
+    return s
+end
+
 """
     trajectory_callback(storage)
 
diff --git a/test/quadratic_lp_active_set.jl b/test/quadratic_lp_active_set.jl
new file mode 100644
index 000000000..a87b37cea
--- /dev/null
+++ b/test/quadratic_lp_active_set.jl
@@ -0,0 +1,113 @@
+using FrankWolfe
+using LinearAlgebra
+using Random
+using Test
+import HiGHS
+import MathOptInterface as MOI
+
+n = Int(1e4)
+k = 5000
+
+s = 10
+@info "Seed $s"
+Random.seed!(s)
+
+xpi = rand(n);
+total = sum(xpi);
+
+const xp = xpi ./ total;
+
+f(x) = norm(x - xp)^2
+function grad!(storage, x)
+    @. storage = 2 * (x - xp)
+end
+
+lmo = FrankWolfe.KSparseLMO(5, 1.0)
+
+const x00 = FrankWolfe.compute_extreme_point(lmo, rand(n))
+
+function build_callback(trajectory_arr)
+    return function callback(state, active_set, args...)
+        return push!(trajectory_arr, (FrankWolfe.callback_state(state)..., length(active_set)))
+    end
+end
+
+trajectoryBPCG_standard = []
+x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    copy(x00),
+    max_iteration=k,
+    line_search=FrankWolfe.Shortstep(2.0),
+    verbose=false,
+    callback=build_callback(trajectoryBPCG_standard),
+);
+
+as_quad_direct = FrankWolfe.ActiveSetQuadraticLinearSolve(
+    [(1.0, copy(x00))],
+    2 * LinearAlgebra.I,
+    -2xp,
+    MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)),
+)
+
+trajectoryBPCG_quadratic_direct_specialized = []
+x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    as_quad_direct,
+    max_iteration=k,
+    line_search=FrankWolfe.Shortstep(2.0),
+    verbose=false,
+    callback=build_callback(trajectoryBPCG_quadratic_direct_specialized),
+);
+
+as_quad_direct_generic = FrankWolfe.ActiveSetQuadraticLinearSolve(
+    [(1.0, copy(x00))],
+    2 * Diagonal(ones(length(xp))),
+    -2xp,
+    MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)),
+)
+
+trajectoryBPCG_quadratic_direct_generic = []
+x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    as_quad_direct_generic,
+    max_iteration=k,
+    line_search=FrankWolfe.Shortstep(2.0),
+    verbose=true,
+    callback=build_callback(trajectoryBPCG_quadratic_direct_generic),
+);
+
+as_quad_direct_basic_as = FrankWolfe.ActiveSetQuadraticLinearSolve(
+    FrankWolfe.ActiveSet([1.0], [copy(x00)], collect(x00)),
+    2 * LinearAlgebra.I,
+    -2xp,
+    MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)),
+)
+
+trajectoryBPCG_quadratic_noqas = []
+x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    as_quad_direct_basic_as,
+    max_iteration=k,
+    line_search=FrankWolfe.Shortstep(2.0),
+    verbose=true,
+    callback=build_callback(trajectoryBPCG_quadratic_noqas),
+);
+
+
+dual_gaps_quadratic_specialized = getindex.(trajectoryBPCG_quadratic_direct_specialized, 4)
+dual_gaps_quadratic_generic = getindex.(trajectoryBPCG_quadratic_direct_generic, 4)
+dual_gaps_quadratic_noqas = getindex.(trajectoryBPCG_quadratic_noqas, 4)
+dual_gaps_bpcg = getindex.(trajectoryBPCG_standard, 4)
+
+
+@test dual_gaps_quadratic_specialized[end] < dual_gaps_bpcg[end]
+@test dual_gaps_quadratic_noqas[end] < dual_gaps_bpcg[end]
+@test norm(dual_gaps_quadratic_noqas - dual_gaps_quadratic_noqas) ≤ k * 1e-5
diff --git a/test/runtests.jl b/test/runtests.jl
index d984043dc..40f5ac366 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -798,6 +798,20 @@ using Test
 end
 end
 
+module LpDirectSolveTest
+using Test
+@testset "LP solving for quadratic functions and active set" begin
+    include("quadratic_lp_active_set.jl")
+end
+end
+
+module SparsifyingActiveSetTest
+using Test
+@testset "Sparsifying active set" begin
+    include("sparsifying_activeset.jl")
+end
+end
+
 include("generic-arrays.jl")
 
 include("blocks.jl")
diff --git a/test/sparsifying_activeset.jl b/test/sparsifying_activeset.jl
new file mode 100644
index 000000000..1fde16d53
--- /dev/null
+++ b/test/sparsifying_activeset.jl
@@ -0,0 +1,85 @@
+#=
+This example demonstrates the use of the Blended Pairwise Conditional Gradient algorithm
+with direct solve steps for a quadratic optimization problem over a sparse polytope.
+
+The example showcases how the algorithm balances between:
+- Pairwise steps for efficient optimization
+- Periodic direct solves for handling the quadratic objective
+- Lazy (approximate) linear minimization steps for improved iteration complexity
+
+It also demonstrates how to set up custom callbacks for tracking algorithm progress.
+=#
+
+using LinearAlgebra
+using Test
+using Random
+using FrankWolfe
+import HiGHS
+
+n = Int(1e4)
+k = 10000
+
+# s = rand(1:100)
+s = 10
+@info "Seed $s"
+Random.seed!(s)
+
+xpi = rand(n);
+total = sum(xpi);
+
+# here the optimal solution lies in the interior if you want an optimal solution on a face and not the interior use:
+# const xp = xpi;
+
+const xp = xpi ./ total;
+
+f(x) = norm(x - xp)^2
+function grad!(storage, x)
+    @. storage = 2 * (x - xp)
+end
+
+lmo = FrankWolfe.LpNormLMO{Float64,5}(1.0)
+
+const x00 = FrankWolfe.compute_extreme_point(lmo, rand(n))
+
+function build_callback(trajectory_arr)
+    return function callback(state, active_set, args...)
+        return push!(trajectory_arr, (FrankWolfe.callback_state(state)..., length(active_set)))
+    end
+end
+
+trajectoryBPCG_standard = []
+x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    copy(x00),
+    max_iteration=k,
+    line_search=FrankWolfe.Shortstep(2.0),
+    verbose=false,
+    trajectory=false,
+    callback=build_callback(trajectoryBPCG_standard),
+);
+
+active_set_sparse = FrankWolfe.ActiveSetSparsifier(
+    FrankWolfe.ActiveSet([1.0], [x00], similar(x00)),
+    HiGHS.Optimizer(),
+)
+trajectoryBPCG_as_sparse = []
+x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    copy(x00),
+    max_iteration=k,
+    line_search=FrankWolfe.Shortstep(2.0),
+    verbose=false,
+    callback=build_callback(trajectoryBPCG_as_sparse),
+);
+
+as_cardinality_bpcg = getindex.(trajectoryBPCG_standard, 6)
+as_cardinality_sparse = getindex.(trajectoryBPCG_as_sparse, 6)
+@test maximum(as_cardinality_sparse - as_cardinality_bpcg) <= 0
+
+dual_gap_bpcg = getindex.(trajectoryBPCG_standard, 4)
+dual_gap_sparse = getindex.(trajectoryBPCG_as_sparse, 4)
+@test maximum(dual_gap_sparse - dual_gap_bpcg) <= 1e-7

From 6bcb4b707a9c80b389ac924698660e0d383b34be Mon Sep 17 00:00:00 2001
From: dhendryc <92737336+dhendryc@users.noreply.github.com>
Date: Mon, 28 Oct 2024 10:58:42 +0100
Subject: [PATCH 07/21] Actually use Backtracking in Secant (#517)

* Show lowest dual gap in last iteration.

* Add backtracking as a fall back linesearch in Secant.

* Let the user decide the fallback line search.

* Adjust syntax.

* Type stability.

* Actually use backtracking.

---------

Co-authored-by: Hendrych <dhendryc@mob-009366.zib.de>
---
 src/linesearch.jl | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/src/linesearch.jl b/src/linesearch.jl
index 5ae2d3fe1..f974b9c38 100644
--- a/src/linesearch.jl
+++ b/src/linesearch.jl
@@ -425,7 +425,7 @@ function perform_line_search(
     while abs(dot_gdir) > line_search.tol
         if i > line_search.limit_num_steps
             workspace.last_gamma = best_gamma  # Update last_gamma before returning
-            return best_gamma
+            break
         end
 
         grad!(grad_storage, storage)
@@ -433,7 +433,7 @@ function perform_line_search(
 
         if dot_gdir_new ≈ dot_gdir
             workspace.last_gamma = best_gamma  # Update last_gamma before returning
-            return best_gamma
+            break
         end
 
         gamma_new = gamma - dot_gdir_new * (gamma - gamma_prev) / (dot_gdir_new - dot_gdir)
@@ -452,6 +452,12 @@ function perform_line_search(
         i += 1
     end
     if abs(dot_gdir) > line_search.tol
+        # Choose gamma_max to be domain feasible
+        storage = muladd_memory_mode(memory_mode, storage, x, gamma_max, d)
+        while !line_search.domain_oracle(storage)
+            gamma_max /= 2
+            storage = muladd_memory_mode(memory_mode, storage, x, gamma_max, d)
+        end
         gamma = perform_line_search(
             line_search.inner_ls,
             0,
@@ -468,8 +474,9 @@ function perform_line_search(
         storage = muladd_memory_mode(memory_mode, storage, x, gamma, d)
         new_val = f(storage)
 
-        @assert new_val <= best_val
-        best_gamma = gamma
+        if new_val <= best_val
+            best_gamma = gamma
+        end
     end
     workspace.last_gamma = best_gamma  # Update last_gamma before returning
     return best_gamma

From ccf1367a8325848a785ed2a2157188ec39b97fb6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mathieu=20Besan=C3=A7on?= <mathieu.besancon@gmail.com>
Date: Mon, 28 Oct 2024 11:46:13 +0100
Subject: [PATCH 08/21] fix sparsify types (#518)

* fix sparsify types

* remove log
---
 examples/blended_pairwise_with_sparsify.jl | 14 ++++----------
 src/active_set_sparsifier.jl               |  4 ++--
 2 files changed, 6 insertions(+), 12 deletions(-)

diff --git a/examples/blended_pairwise_with_sparsify.jl b/examples/blended_pairwise_with_sparsify.jl
index 6d42b7de4..7700268bb 100644
--- a/examples/blended_pairwise_with_sparsify.jl
+++ b/examples/blended_pairwise_with_sparsify.jl
@@ -64,8 +64,6 @@ trajectoryBPCG_standard = []
     copy(x00),
     max_iteration=k,
     line_search=FrankWolfe.Shortstep(2.0),
-    print_iter=k / 10,
-    memory_mode=FrankWolfe.InplaceEmphasis(),
     verbose=true,
     trajectory=true,
     callback=build_callback(trajectoryBPCG_standard),
@@ -76,25 +74,21 @@ active_set_sparse = FrankWolfe.ActiveSetSparsifier(
     FrankWolfe.ActiveSet([1.0], [x00], similar(x00)),
     HiGHS.Optimizer(),
 )
-
 @time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
     f,
     grad!,
     lmo,
-    copy(x00),
+    active_set_sparse,
     max_iteration=k,
     line_search=FrankWolfe.Shortstep(2.0),
-    print_iter=k / 10,
-    memory_mode=FrankWolfe.InplaceEmphasis(),
     verbose=true,
-    trajectory=true,
     callback=build_callback(trajectoryBPCG_as_sparse),
 );
 
 # Reduction primal/dual error vs. sparsity of solution
 
-dataSparsity = [trajectoryBPCG_standard, trajectoryBPCG_quadratic]
-labelSparsity = ["BPCG (Standard)", "BPCG (Sparsify)"]
+plot_data = [trajectoryBPCG_standard, trajectoryBPCG_as_sparse]
+plot_labels = ["BPCG (Standard)", "BPCG (Sparsify)"]
 
 # Plot sparsity
-plot_sparsity(dataSparsity, labelSparsity, legend_position=:topright)
+plot_sparsity(plot_data, plot_labels, legend_position=:topright)
diff --git a/src/active_set_sparsifier.jl b/src/active_set_sparsifier.jl
index 6bb523217..6a408a0ac 100644
--- a/src/active_set_sparsifier.jl
+++ b/src/active_set_sparsifier.jl
@@ -37,13 +37,13 @@ Base.deleteat!(as::ActiveSetSparsifier, idx::Int) = deleteat!(as.active_set, idx
 Base.empty!(as::ActiveSetSparsifier) = empty!(as.active_set)
 
 function active_set_update!(
-    as::ActiveSetSparsifier{AS,OT,AT,R,IT},
+    as::ActiveSetSparsifier{AT,R,IT,AS},
     lambda,
     atom,
     renorm=true,
     idx=nothing;
     kwargs...,
-) where {AS,OT,AT,R,IT}
+) where {AS,AT,R,IT}
     active_set_update!(as.active_set, lambda, atom, renorm, idx; kwargs...)
     n = length(as)
     as.counter[] += 1

From 87d8ec1a1249721a28095e085bb2d9baf101815e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Designolle?=
 <55443987+sebastiendesignolle@users.noreply.github.com>
Date: Mon, 28 Oct 2024 12:39:27 +0100
Subject: [PATCH 09/21] Revert inverse syntax (#520)

---
 src/active_set_quadratic.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/active_set_quadratic.jl b/src/active_set_quadratic.jl
index a616dd1fb..5739198f2 100644
--- a/src/active_set_quadratic.jl
+++ b/src/active_set_quadratic.jl
@@ -36,7 +36,7 @@ function detect_quadratic_function(grad!, x0; test=true)
         X[:, i] .-= x0
         G[:, i] .= storage .- g0
     end
-    A = G \ X
+    A = G * inv(X)
     b = g0 - A * x0
     if test
         x_test = randn(T, n)

From 8e094714d1a5309da506f7c2fafa11a723732366 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mathieu=20Besan=C3=A7on?= <mathieu.besancon@gmail.com>
Date: Mon, 28 Oct 2024 13:34:39 +0100
Subject: [PATCH 10/21] Update Project.toml (#514)

* Update Project.toml

* update docs
---
 Project.toml                    | 2 +-
 docs/src/reference/3_backend.md | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Project.toml b/Project.toml
index ca298fa1a..0949ccd84 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "FrankWolfe"
 uuid = "f55ce6ea-fdc5-4628-88c5-0087fe54bd30"
 authors = ["ZIB-IOL"]
-version = "0.4.1"
+version = "0.4.2"
 
 [deps]
 Arpack = "7d9fca2a-8960-54d3-9f78-7d1dccf2cb97"
diff --git a/docs/src/reference/3_backend.md b/docs/src/reference/3_backend.md
index 1856373e0..34e71f040 100644
--- a/docs/src/reference/3_backend.md
+++ b/docs/src/reference/3_backend.md
@@ -4,7 +4,7 @@
 
 ```@autodocs
 Modules = [FrankWolfe]
-Pages = ["active_set.jl"]
+Pages = ["active_set.jl", "active_set_quadratic.jl", "active_set_quadratic_direct_solve.jl", "active_set_sparsifier.jl"]
 ```
 
 ## Functions and gradients

From 57520a75a3ca2262428c28c69cfc53f0bf15e2e4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Designolle?=
 <sebastien.designolle@gmail.com>
Date: Tue, 29 Oct 2024 15:20:11 +0100
Subject: [PATCH 11/21] Factorise constructor syntax (typed vs not typed)

---
 src/active_set.jl           | 14 ++------------
 src/active_set_quadratic.jl | 23 +----------------------
 2 files changed, 3 insertions(+), 34 deletions(-)

diff --git a/src/active_set.jl b/src/active_set.jl
index c02145666..f02177933 100644
--- a/src/active_set.jl
+++ b/src/active_set.jl
@@ -27,17 +27,7 @@ ActiveSet{AT,R}() where {AT,R} = ActiveSet{AT,R,Vector{float(eltype(AT))}}([], [
 ActiveSet{AT}() where {AT} = ActiveSet{AT,Float64,Vector{float(eltype(AT))}}()
 
 function ActiveSet(tuple_values::AbstractVector{Tuple{R,AT}}) where {AT,R}
-    n = length(tuple_values)
-    weights = Vector{R}(undef, n)
-    atoms = Vector{AT}(undef, n)
-    @inbounds for idx in 1:n
-        weights[idx] = tuple_values[idx][1]
-        atoms[idx] = tuple_values[idx][2]
-    end
-    x = similar(atoms[1], float(eltype(atoms[1])))
-    as = ActiveSet{AT,R,typeof(x)}(weights, atoms, x)
-    compute_active_set_iterate!(as)
-    return as
+    return ActiveSet{AT,R}(tuple_values)
 end
 
 function ActiveSet{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}) where {AT,R}
@@ -48,7 +38,7 @@ function ActiveSet{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}})
         weights[idx] = tuple_values[idx][1]
         atoms[idx] = tuple_values[idx][2]
     end
-    x = similar(tuple_values[1][2], float(eltype(tuple_values[1][2])))
+    x = similar(atoms[1], float(eltype(atoms[1])))
     as = ActiveSet{AT,R,typeof(x)}(weights, atoms, x)
     compute_active_set_iterate!(as)
     return as
diff --git a/src/active_set_quadratic.jl b/src/active_set_quadratic.jl
index 5739198f2..adea1c10b 100644
--- a/src/active_set_quadratic.jl
+++ b/src/active_set_quadratic.jl
@@ -54,28 +54,7 @@ function ActiveSetQuadratic(tuple_values::AbstractVector{Tuple{R,AT}}, grad!::Fu
 end
 
 function ActiveSetQuadratic(tuple_values::AbstractVector{Tuple{R,AT}}, A::H, b) where {AT,R,H}
-    n = length(tuple_values)
-    weights = Vector{R}(undef, n)
-    atoms = Vector{AT}(undef, n)
-    dots_x = zeros(R, n)
-    dots_A = Vector{Vector{R}}(undef, n)
-    dots_b = Vector{R}(undef, n)
-    weights_prev = zeros(R, n)
-    modified = trues(n)
-    @inbounds for idx in 1:n
-        weights[idx] = tuple_values[idx][1]
-        atoms[idx] = tuple_values[idx][2]
-        dots_A[idx] = Vector{R}(undef, idx)
-        for idy in 1:idx
-            dots_A[idx][idy] = fast_dot(A * atoms[idx], atoms[idy])
-        end
-        dots_b[idx] = fast_dot(b, atoms[idx])
-    end
-    x = similar(b)
-    as = ActiveSetQuadratic(weights, atoms, x, A, b, dots_x, dots_A, dots_b, weights_prev, modified)
-
-    compute_active_set_iterate!(as)
-    return as
+    return ActiveSetQuadratic{AT,R}(tuple_values, A, b)
 end
 
 function ActiveSetQuadratic{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, grad!::Function) where {AT,R}

From d71ec0b6d67291bf4ffca319c473fef4313410ce Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Designolle?=
 <sebastien.designolle@gmail.com>
Date: Tue, 29 Oct 2024 15:32:50 +0100
Subject: [PATCH 12/21] Isolate reset_quadratic_dots! outside the constructor

---
 src/active_set_quadratic.jl | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/src/active_set_quadratic.jl b/src/active_set_quadratic.jl
index adea1c10b..08f8e4e96 100644
--- a/src/active_set_quadratic.jl
+++ b/src/active_set_quadratic.jl
@@ -74,18 +74,27 @@ function ActiveSetQuadratic{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,
     @inbounds for idx in 1:n
         weights[idx] = tuple_values[idx][1]
         atoms[idx] = tuple_values[idx][2]
-        dots_A[idx] = Vector{R}(undef, idx)
-        for idy in 1:idx
-            dots_A[idx][idy] = fast_dot(A * atoms[idx], atoms[idy])
-        end
-        dots_b[idx] = fast_dot(b, atoms[idx])
     end
     x = similar(b)
     as = ActiveSetQuadratic{AT,R,typeof(x),H}(weights, atoms, x, A, b, dots_x, dots_A, dots_b, weights_prev, modified)
+    reset_quadratic_dots!(as)
     compute_active_set_iterate!(as)
     return as
 end
 
+# should only be called upon construction
+# for active sets with a large number of atoms, this function becomes very costly
+function reset_quadratic_dots!(as::ActiveSetQuadratic{AT,R}) where {AT,R}
+    @inbounds for idx in 1:length(as)
+        as.dots_A[idx] = Vector{R}(undef, idx)
+        for idy in 1:idx
+            as.dots_A[idx][idy] = fast_dot(as.A * as.atoms[idx], as.atoms[idy])
+        end
+        as.dots_b[idx] = fast_dot(as.b, as.atoms[idx])
+    end
+    return as
+end
+
 # custom dummy structure to handle identity hessian matrix
 # required as LinearAlgebra.I does not work for general tensors
 struct Identity{R <: Real}

From f9cdd9f248af465a83ac7f877318534f2880acf4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mathieu=20Besan=C3=A7on?= <mathieu.besancon@gmail.com>
Date: Tue, 29 Oct 2024 17:29:32 +0100
Subject: [PATCH 13/21] tolerance for non-diagonal and test (#521)

* tolerance for non-diagonal and test

* separate namespaces

* factorize active set direct solve by Hessian

* fix type not necessarily matrix

* fix type
---
 src/active_set_quadratic_direct_solve.jl |  64 +++----------
 test/as_quadratic_projection.jl          | 116 +++++++++++++++++++++++
 test/runtests.jl                         |   7 ++
 3 files changed, 135 insertions(+), 52 deletions(-)
 create mode 100644 test/as_quadratic_projection.jl

diff --git a/src/active_set_quadratic_direct_solve.jl b/src/active_set_quadratic_direct_solve.jl
index fef53a077..c90ab0a8e 100644
--- a/src/active_set_quadratic_direct_solve.jl
+++ b/src/active_set_quadratic_direct_solve.jl
@@ -262,8 +262,8 @@ Solves the auxiliary LP over the current active set.
 The method is specialized by type `H` of the Hessian matrix `A`.
 """
 function solve_quadratic_activeset_lp!(
-    as::ActiveSetQuadraticLinearSolve{AT,R,IT,<:AbstractMatrix},
-) where {AT,R,IT}
+    as::ActiveSetQuadraticLinearSolve{AT,R,IT,H},
+) where {AT,R,IT,H}
     nv = length(as)
     o = as.lp_optimizer
     MOI.empty!(o)
@@ -278,7 +278,10 @@ function solve_quadratic_activeset_lp!(
         Base.sizehint!(lhs.terms, nv)
         # replaces direct sum because of MOI and MutableArithmetic slow sums
         for j in 1:nv
-            push!(lhs.terms, MOI.ScalarAffineTerm(fast_dot(atom, as.A, as.atoms[j]), λ[j]))
+            push!(
+                lhs.terms,
+                _compute_quadratic_constraint_term(atom, as.A, as.atoms[j], λ[j]),
+            )
         end
         rhs = -dot(atom, as.b)
         MOI.add_constraint(o, lhs, MOI.EqualTo(rhs))
@@ -293,7 +296,7 @@ function solve_quadratic_activeset_lp!(
     new_weights = R[]
     for idx in eachindex(λ)
         weight_value = MOI.get(o, MOI.VariablePrimal(), λ[idx])
-        if weight_value <= 1e-5
+        if weight_value <= 2 * weight_purge_threshold_default(typeof(weight_value))
             push!(indices_to_remove, idx)
         else
             push!(new_weights, weight_value)
@@ -308,55 +311,12 @@ function solve_quadratic_activeset_lp!(
     return as
 end
 
-# special case of scaled identity Hessian
+function _compute_quadratic_constraint_term(atom1, A::AbstractMatrix, atom2, λ)
+    return MOI.ScalarAffineTerm(fast_dot(atom1, A, atom2), λ)
+end
 
-function solve_quadratic_activeset_lp!(
-    as::ActiveSetQuadraticLinearSolve{AT,R,IT,<:Union{Identity,LinearAlgebra.UniformScaling}},
-) where {AT,R,IT}
-    hessian_scaling = as.A.λ
-    nv = length(as)
-    o = as.lp_optimizer
-    MOI.empty!(o)
-    λ = MOI.add_variables(o, nv)
-    # λ ≥ 0, ∑ λ == 1
-    MOI.add_constraint.(o, λ, MOI.GreaterThan(0.0))
-    sum_of_variables = MOI.ScalarAffineFunction(MOI.ScalarAffineTerm.(1.0, λ), 0.0)
-    MOI.add_constraint(o, sum_of_variables, MOI.EqualTo(1.0))
-    # a Aᵗ A λ == -Aᵗ b
-    for atom in as.atoms
-        lhs = MOI.ScalarAffineFunction{Float64}([], 0.0)
-        Base.sizehint!(lhs.terms, nv)
-        # replaces direct sum because of MOI and MutableArithmetic slow sums
-        for j in 1:nv
-            push!(lhs.terms, MOI.ScalarAffineTerm(hessian_scaling * dot(atom, as.atoms[j]), λ[j]))
-        end
-        rhs = -dot(atom, as.b)
-        MOI.add_constraint(o, lhs, MOI.EqualTo(rhs))
-    end
-    MOI.set(o, MOI.ObjectiveFunction{typeof(sum_of_variables)}(), sum_of_variables)
-    MOI.set(o, MOI.ObjectiveSense(), MOI.MIN_SENSE)
-    MOI.optimize!(o)
-    if MOI.get(o, MOI.TerminationStatus()) ∉ (MOI.OPTIMAL, MOI.FEASIBLE_POINT, MOI.ALMOST_OPTIMAL)
-        return as
-    end
-    indices_to_remove = Int[]
-    new_weights = R[]
-    for idx in eachindex(λ)
-        weight_value = MOI.get(o, MOI.VariablePrimal(), λ[idx])
-        if weight_value <= 2 * weight_purge_threshold_default(typeof(weight_value))
-            push!(indices_to_remove, idx)
-        else
-            push!(new_weights, weight_value)
-        end
-    end
-    deleteat!(as.active_set, indices_to_remove)
-    @assert length(as) == length(new_weights)
-    update_weights!(as.active_set, new_weights)
-    active_set_cleanup!(as)
-    @assert all(>=(0), new_weights)
-    active_set_renormalize!(as)
-    compute_active_set_iterate!(as)
-    return as
+function _compute_quadratic_constraint_term(atom1, A::Union{Identity,LinearAlgebra.UniformScaling}, atom2, λ)
+    return MOI.ScalarAffineTerm(A.λ * fast_dot(atom1, atom2), λ)
 end
 
 struct LogScheduler{T}
diff --git a/test/as_quadratic_projection.jl b/test/as_quadratic_projection.jl
new file mode 100644
index 000000000..2060d1997
--- /dev/null
+++ b/test/as_quadratic_projection.jl
@@ -0,0 +1,116 @@
+using FrankWolfe
+using LinearAlgebra
+using Random
+
+import HiGHS
+import MathOptInterface as MOI
+using Test
+
+n = Int(1e2)
+k = 10000
+
+# s = rand(1:100)
+s = 10
+@info "Seed $s"
+Random.seed!(s)
+
+
+xpi = rand(n);
+total = sum(xpi);
+
+xp = xpi ./ total;
+
+f(x) = norm(x - xp)^2
+function grad!(storage, x)
+    @. storage = 2 * (x - xp)
+end
+
+# lmo = FrankWolfe.KSparseLMO(5, 1000.0)
+
+## other LMOs to try
+# lmo_big = FrankWolfe.KSparseLMO(100, big"1.0")
+# lmo = FrankWolfe.LpNormLMO{Float64,5}(100.0)
+# lmo = FrankWolfe.ProbabilitySimplexOracle(100.0);
+lmo = FrankWolfe.UnitSimplexOracle(10000.0);
+
+x00 = FrankWolfe.compute_extreme_point(lmo, rand(n))
+
+
+function build_callback(trajectory_arr)
+    return function callback(state, active_set, args...)
+        return push!(trajectory_arr, (FrankWolfe.callback_state(state)..., length(active_set)))
+    end
+end
+
+
+trajectoryBPCG_standard = []
+x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    copy(x00),
+    max_iteration=k,
+    callback=build_callback(trajectoryBPCG_standard),
+);
+
+
+active_set_quadratic_automatic_standard = FrankWolfe.ActiveSetQuadraticLinearSolve(
+    FrankWolfe.ActiveSet([(1.0, copy(x00))]),
+    grad!,
+    MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)),
+    scheduler=FrankWolfe.LogScheduler(start_time=10, scaling_factor=1),
+)
+trajectoryBPCG_quadratic_automatic_standard = []
+x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    active_set_quadratic_automatic_standard,
+    max_iteration=k,
+    callback=build_callback(trajectoryBPCG_quadratic_automatic_standard),
+);
+
+active_set_quadratic_automatic = FrankWolfe.ActiveSetQuadraticLinearSolve(
+    [(1.0, copy(x00))],
+    grad!,
+    MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)),
+    scheduler=FrankWolfe.LogScheduler(start_time=10, scaling_factor=1),
+)
+trajectoryBPCG_quadratic_automatic = []
+x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    active_set_quadratic_automatic,
+    max_iteration=k,
+    callback=build_callback(trajectoryBPCG_quadratic_automatic),
+);
+
+active_set_quadratic_manual = FrankWolfe.ActiveSetQuadraticLinearSolve(
+    FrankWolfe.ActiveSet([(1.0, copy(x00))]),
+    2.0 * I, -2xp,
+    MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)),
+    scheduler=FrankWolfe.LogScheduler(start_time=10, scaling_factor=1),
+)
+trajectoryBPCG_quadratic_manual = []
+x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    active_set_quadratic_manual,
+    max_iteration=k,
+    callback=build_callback(trajectoryBPCG_quadratic_manual),
+);
+
+traj_data = [
+    trajectoryBPCG_standard,
+    trajectoryBPCG_quadratic_automatic_standard,
+    trajectoryBPCG_quadratic_automatic,
+    trajectoryBPCG_quadratic_manual,
+]
+
+# all should have converged
+for traj in traj_data
+    @test traj[end][2] ≤ 1e-8
+    @test traj[end][4] ≤ 1e-7
+end
diff --git a/test/runtests.jl b/test/runtests.jl
index 40f5ac366..112582329 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -805,6 +805,13 @@ using Test
 end
 end
 
+module LpDirectSolveTestProjection
+using Test
+@testset "LP solving for quadratic functions and active set" begin
+    include("as_quadratic_projection.jl")
+end
+end
+
 module SparsifyingActiveSetTest
 using Test
 @testset "Sparsifying active set" begin

From e805431823dbe5ec513ef8430fa79232d131a7dc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Designolle?=
 <55443987+sebastiendesignolle@users.noreply.github.com>
Date: Sun, 3 Nov 2024 08:45:05 +0100
Subject: [PATCH 14/21] Refactor ActiveSetQuadratic and SymmetricXXX (#524)

* ActiveSetQuadratic -> ActiveSetQuadraticCachedProducts

* SymmetricLMO -> SubspaceLMO

* reduce -> deflate

* SymmetricArray -> SubspaceVector

* Cap length of test strings to 50 characters

* Suppress verbosity

* Fix docstrings for Subspace LMO & Vector

* ActiveSetQuadraticCachedProducts -> ActiveSetQuadraticProductCaching
---
 docs/src/advanced.md                          |  4 +-
 examples/birkhoff_polytope.jl                 |  2 +-
 examples/birkhoff_polytope_symmetric.jl       | 32 +++----
 examples/blended_pairwise_with_direct.jl      |  4 +-
 examples/docs_12_quadratic_symmetric.jl       | 52 ++++++------
 examples/quadratic.jl                         |  2 +-
 examples/quadratic_A.jl                       |  2 +-
 examples/reynolds.jl                          |  4 +-
 examples/symmetric.jl                         | 14 ++--
 src/abstract_oracles.jl                       | 25 +++---
 src/active_set_quadratic.jl                   | 50 +++++------
 src/active_set_quadratic_direct_solve.jl      |  6 +-
 src/pairwise.jl                               |  6 +-
 src/types.jl                                  | 71 ++++++++--------
 test/active_set.jl                            | 12 +--
 test/active_set_variants.jl                   |  8 +-
 test/alternating_methods_tests.jl             | 18 ++--
 test/as_quadratic_projection.jl               |  2 +-
 test/bcg_direction_error.jl                   |  4 +-
 test/benchmark.jl                             |  1 -
 test/blocks.jl                                |  4 +-
 test/extra_storage.jl                         |  6 +-
 test/function_gradient.jl                     |  2 +-
 test/generic-arrays.jl                        | 10 +--
 test/lmo.jl                                   | 44 +++++-----
 test/momentum_memory.jl                       |  4 +-
 test/oddities.jl                              | 14 ++--
 test/quadratic_lp_active_set.jl               |  6 +-
 test/rational_test.jl                         |  4 +-
 test/runtests.jl                              | 83 +++++++++----------
 test/sparsifying_activeset.jl                 |  2 +-
 test/timeout.jl                               |  6 +-
 test/tracking.jl                              |  8 +-
 test/trajectory_tests/open_loop_parametric.jl |  8 +-
 test/utils.jl                                 | 12 +--
 35 files changed, 271 insertions(+), 261 deletions(-)

diff --git a/docs/src/advanced.md b/docs/src/advanced.md
index a404fad85..ea438ca74 100644
--- a/docs/src/advanced.md
+++ b/docs/src/advanced.md
@@ -53,7 +53,7 @@ See [Extra-lazification](@ref) for a complete example.
 
 ## Specialized active set for quadratic functions
 
-If the objective function is quadratic, a considerable speedup can be obtained by using the structure `ActiveSetQuadratic`.
+If the objective function is quadratic, a considerable speedup can be obtained by using the structure `ActiveSetQuadraticProductCaching`.
 It relies on the storage of various scalar products to efficiently determine the best (and worst for `blended_pairwise_conditional_gradient`) atom in the active set without the need of computing many scalar products in each iteration.
 The user should provide the Hessian matrix `A` as well as the linear part `b` of the function, such that:
 ```math
@@ -228,6 +228,6 @@ This subspace is the image of the Reynolds operator defined by
 \mathcal{R}(x)=\frac{1}{|G|}\sum_{g\in G}g\cdot x.
 ```
 
-In practice, the type `SymmetricLMO` allows the user to provide the Reynolds operator $\mathcal{R}$ as well as its adjoint $\mathcal{R}^\ast$.
+In practice, the type `SubspaceLMO` allows the user to provide the Reynolds operator $\mathcal{R}$ as well as its adjoint $\mathcal{R}^\ast$.
 The gradient is symmetrised with $\mathcal{R}^\ast$, then passed to the non-symmetric LMO, and the resulting output is symmetrised with $\mathcal{R}$.
 In many cases, the gradient is already symmetric so that `reynolds_adjoint(gradient, lmo) = gradient` is a fast and valid choice.
diff --git a/examples/birkhoff_polytope.jl b/examples/birkhoff_polytope.jl
index e181ba69b..c81771ca7 100644
--- a/examples/birkhoff_polytope.jl
+++ b/examples/birkhoff_polytope.jl
@@ -54,7 +54,7 @@ FrankWolfe.benchmark_oracles(
     x -> cf(x, xp, normxp2),
     (str, x) -> cgrad!(str, x, xp),
     lmo,
-    FrankWolfe.ActiveSetQuadratic([(1.0, collect(x0))], 2I/n^2, -2xp/n^2); # surprisingly faster and more memory efficient with collect
+    FrankWolfe.ActiveSetQuadraticProductCaching([(1.0, collect(x0))], 2I/n^2, -2xp/n^2); # surprisingly faster and more memory efficient with collect
     max_iteration=k,
     line_search=FrankWolfe.Shortstep(2/n^2),
     lazy=true,
diff --git a/examples/birkhoff_polytope_symmetric.jl b/examples/birkhoff_polytope_symmetric.jl
index db1c79a55..8e1c3933d 100644
--- a/examples/birkhoff_polytope_symmetric.jl
+++ b/examples/birkhoff_polytope_symmetric.jl
@@ -36,7 +36,7 @@ x0 = FrankWolfe.compute_extreme_point(lmo_nat, randn(n, n))
     x -> cf(x, xp, normxp2),
     (str, x) -> cgrad!(str, x, xp),
     lmo_nat,
-    FrankWolfe.ActiveSetQuadratic([(1.0, x0)], 2I/n^2, -2xp/n^2);
+    FrankWolfe.ActiveSetQuadraticProductCaching([(1.0, x0)], 2I/n^2, -2xp/n^2);
     max_iteration=k,
     line_search=FrankWolfe.Shortstep(2/n^2),
     lazy=true,
@@ -48,15 +48,15 @@ x0 = FrankWolfe.compute_extreme_point(lmo_nat, randn(n, n))
 # here the problem is invariant under mirror symmetry around the diagonal and the anti-diagonal
 # each solution of the LMO can then be added to the active set together with its orbit
 # on top of that, the effective dimension of the space is reduced
-# the following function constructs the functions `reduce` and `inflate` needed for SymmetricLMO
-# `reduce` maps a matrix to the invariant vector space
+# the following function constructs the functions `deflate` and `inflate` needed for SubspaceLMO
+# `deflate` maps a matrix to the invariant vector space
 # `inflate` maps a vector in this space back to a matrix
-# using `FrankWolfe.SymmetricArray` is a convenience to avoid reallocating the result of `inflate`
-function build_reduce_inflate(p::Matrix{T}) where {T <: Number}
+# using `FrankWolfe.SubspaceVector` is a convenience to avoid reallocating the result of `inflate`
+function build_deflate_inflate(p::Matrix{T}) where {T <: Number}
     n = size(p, 1)
     @assert n == size(p, 2) # square matrix
-    dimension = floor(Int, (n+1)^2 / 4) # reduced dimension
-    function reduce(A::AbstractMatrix{T}, lmo)
+    dimension = floor(Int, (n+1)^2 / 4) # deflated dimension
+    function deflate(A::AbstractMatrix{T}, lmo)
         vec = Vector{T}(undef, dimension)
         cnt = 0
         @inbounds for i in 1:(n+1)÷2, j in i:n+1-i
@@ -75,9 +75,9 @@ function build_reduce_inflate(p::Matrix{T}) where {T <: Number}
                 end
             end
         end
-        return FrankWolfe.SymmetricArray(A, vec)
+        return FrankWolfe.SubspaceVector(A, vec)
     end
-    function inflate(x::FrankWolfe.SymmetricArray, lmo)
+    function inflate(x::FrankWolfe.SubspaceVector, lmo)
         cnt = 0
         @inbounds for i in 1:(n+1)÷2, j in i:n+1-i
             cnt += 1
@@ -102,22 +102,22 @@ function build_reduce_inflate(p::Matrix{T}) where {T <: Number}
         end
         return x.data
     end
-    return reduce, inflate
+    return deflate, inflate
 end
 
-reduce, inflate = build_reduce_inflate(xpi)
-const rxp = reduce(xpi, nothing)
-@assert dot(rxp, rxp) ≈ normxp2 # should be correct thanks to the factors sqrt(2) and 2 in reduce and inflate
+deflate, inflate = build_deflate_inflate(xpi)
+const rxp = deflate(xpi, nothing)
+@assert dot(rxp, rxp) ≈ normxp2 # should be correct thanks to the factors sqrt(2) and 2 in deflate and inflate
 
-lmo_sym = FrankWolfe.SymmetricLMO(lmo_nat, reduce, inflate)
+lmo_sym = FrankWolfe.SubspaceLMO(lmo_nat, deflate, inflate)
 
-rx0 = FrankWolfe.compute_extreme_point(lmo_sym, reduce(sparse(randn(n, n)), nothing))
+rx0 = FrankWolfe.compute_extreme_point(lmo_sym, deflate(sparse(randn(n, n)), nothing))
 
 @time rx, rv, rprimal, rdual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
     x -> cf(x, rxp, normxp2),
     (str, x) -> cgrad!(str, x, rxp),
     lmo_sym,
-    FrankWolfe.ActiveSetQuadratic([(1.0, rx0)], 2I/n^2, -2rxp/n^2);
+    FrankWolfe.ActiveSetQuadraticProductCaching([(1.0, rx0)], 2I/n^2, -2rxp/n^2);
     max_iteration=k,
     line_search=FrankWolfe.Shortstep(2/n^2),
     lazy=true,
diff --git a/examples/blended_pairwise_with_direct.jl b/examples/blended_pairwise_with_direct.jl
index 9a30bde76..828436201 100644
--- a/examples/blended_pairwise_with_direct.jl
+++ b/examples/blended_pairwise_with_direct.jl
@@ -63,7 +63,7 @@ trajectoryBPCG_standard = []
 
 # Just projection quadratic
 trajectoryBPCG_quadratic = []
-as_quad = FrankWolfe.ActiveSetQuadratic([(1.0, copy(x00))], 2 * LinearAlgebra.I, -2xp)
+as_quad = FrankWolfe.ActiveSetQuadraticProductCaching([(1.0, copy(x00))], 2 * LinearAlgebra.I, -2xp)
 @time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
     f,
     grad!,
@@ -75,7 +75,7 @@ as_quad = FrankWolfe.ActiveSetQuadratic([(1.0, copy(x00))], 2 * LinearAlgebra.I,
     callback=build_callback(trajectoryBPCG_quadratic),
 );
 
-as_quad = FrankWolfe.ActiveSetQuadratic([(1.0, copy(x00))], 2 * LinearAlgebra.I, -2xp)
+as_quad = FrankWolfe.ActiveSetQuadraticProductCaching([(1.0, copy(x00))], 2 * LinearAlgebra.I, -2xp)
 
 # with quadratic active set
 trajectoryBPCG_quadratic_as = []
diff --git a/examples/docs_12_quadratic_symmetric.jl b/examples/docs_12_quadratic_symmetric.jl
index 82ca5bd12..160c7e86e 100644
--- a/examples/docs_12_quadratic_symmetric.jl
+++ b/examples/docs_12_quadratic_symmetric.jl
@@ -1,7 +1,7 @@
 # # Accelerations for quadratic functions and symmetric problems
 
-# This example illustrates how to exploit symmetry to reduce the dimension of the problem via `SymmetricLMO`.
-# Moreover, active set based algorithms can be accelerated by using the specialized structure `ActiveSetQuadratic`.
+# This example illustrates how to exploit symmetry to reduce the dimension of the problem via `SubspaceLMO`.
+# Moreover, active set based algorithms can be accelerated by using the specialized structure `ActiveSetQuadraticProductCaching`.
 
 # The specific problem we consider here comes from quantum information and some context can be found [here](https://arxiv.org/abs/2302.04721).
 # Formally, we want to find the distance between a tensor of size `m^N` and the `N`-partite local polytope which is defined by its vertices
@@ -124,11 +124,11 @@ println() #hide
 
 # A first acceleration can be obtained by using the active set specialized for the quadratic objective function,
 # whose gradient is here ``x-p``, explaining the hessian and linear part provided as arguments.
-# The speedup is obtained by pre-computing some scalar products to quickly obtained, in each iteration, the best and worst
-# atoms currently in the active set.
+# The speedup is obtained by pre-computing some scalar products to quickly obtained, in each iteration,
+# the best and worst atoms currently in the active set.
 
-FrankWolfe.blended_pairwise_conditional_gradient(f, grad!, lmo_naive, FrankWolfe.ActiveSetQuadratic([(one(T), x0)], LinearAlgebra.I, -p); verbose=false, lazy=true, line_search=FrankWolfe.Shortstep(one(T)), max_iteration=10) #hide
-asq_naive = FrankWolfe.ActiveSetQuadratic([(one(T), x0)], LinearAlgebra.I, -p)
+FrankWolfe.blended_pairwise_conditional_gradient(f, grad!, lmo_naive, FrankWolfe.ActiveSetQuadraticProductCaching([(one(T), x0)], LinearAlgebra.I, -p); verbose=false, lazy=true, line_search=FrankWolfe.Shortstep(one(T)), max_iteration=10) #hide
+asq_naive = FrankWolfe.ActiveSetQuadraticProductCaching([(one(T), x0)], LinearAlgebra.I, -p)
 @time FrankWolfe.blended_pairwise_conditional_gradient(f, grad!, lmo_naive, asq_naive; verbose, lazy=true, line_search=FrankWolfe.Shortstep(one(T)), max_iteration)
 println() #hide
 
@@ -165,9 +165,9 @@ println() #hide
 # a very small speedup by precomputing and storing `Combinatorics.permutations(1:N)`
 # in a dedicated field of our custom LMO.
 
-lmo_permutedims = FrankWolfe.SymmetricLMO(lmo_naive, reynolds_permutedims)
-FrankWolfe.blended_pairwise_conditional_gradient(f, grad!, lmo_permutedims, FrankWolfe.ActiveSetQuadratic([(one(T), x0)], LinearAlgebra.I, -p); verbose=false, lazy=true, line_search=FrankWolfe.Shortstep(one(T)), max_iteration=10) #hide
-asq_permutedims = FrankWolfe.ActiveSetQuadratic([(one(T), x0)], LinearAlgebra.I, -p)
+lmo_permutedims = FrankWolfe.SubspaceLMO(lmo_naive, reynolds_permutedims)
+FrankWolfe.blended_pairwise_conditional_gradient(f, grad!, lmo_permutedims, FrankWolfe.ActiveSetQuadraticProductCaching([(one(T), x0)], LinearAlgebra.I, -p); verbose=false, lazy=true, line_search=FrankWolfe.Shortstep(one(T)), max_iteration=10) #hide
+asq_permutedims = FrankWolfe.ActiveSetQuadraticProductCaching([(one(T), x0)], LinearAlgebra.I, -p)
 @time FrankWolfe.blended_pairwise_conditional_gradient(f, grad!, lmo_permutedims, asq_permutedims; verbose, lazy=true, line_search=FrankWolfe.Shortstep(one(T)), max_iteration)
 println() #hide
 
@@ -197,9 +197,9 @@ function build_reynolds_unique(p::Array{T, N}) where {T <: Number, N}
     end
 end
 
-lmo_unique = FrankWolfe.SymmetricLMO(lmo_naive, build_reynolds_unique(p))
-FrankWolfe.blended_pairwise_conditional_gradient(f, grad!, lmo_unique, FrankWolfe.ActiveSetQuadratic([(one(T), x0)], LinearAlgebra.I, -p); verbose=false, lazy=true, line_search=FrankWolfe.Shortstep(one(T)), max_iteration=10) #hide
-asq_unique = FrankWolfe.ActiveSetQuadratic([(one(T), x0)], LinearAlgebra.I, -p)
+lmo_unique = FrankWolfe.SubspaceLMO(lmo_naive, build_reynolds_unique(p))
+FrankWolfe.blended_pairwise_conditional_gradient(f, grad!, lmo_unique, FrankWolfe.ActiveSetQuadraticProductCaching([(one(T), x0)], LinearAlgebra.I, -p); verbose=false, lazy=true, line_search=FrankWolfe.Shortstep(one(T)), max_iteration=10) #hide
+asq_unique = FrankWolfe.ActiveSetQuadraticProductCaching([(one(T), x0)], LinearAlgebra.I, -p)
 @time FrankWolfe.blended_pairwise_conditional_gradient(f, grad!, lmo_unique, asq_unique; verbose, lazy=true, line_search=FrankWolfe.Shortstep(one(T)), max_iteration)
 println() #hide
 
@@ -214,7 +214,7 @@ println() #hide
 # accelerations, especially for active set based algorithms in the regime where many lazy iterations are performed.
 # We refer to the example `symmetric.jl` for a small benchmark with symmetric matrices.
 
-function build_reduce_inflate(p::Array{T, N}) where {T <: Number, N}
+function build_deflate_inflate(p::Array{T, N}) where {T <: Number, N}
     ptol = round.(p; digits=8)
     ptol[ptol .== zero(T)] .= zero(T) # transform -0.0 into 0.0 as isequal(0.0, -0.0) is false
     uniquetol = unique(ptol[:])
@@ -227,8 +227,8 @@ function build_reduce_inflate(p::Array{T, N}) where {T <: Number, N}
         for (i, ind) in enumerate(indices)
             vec[i] = sum(A[ind]) / sqmul[i]
         end
-        return FrankWolfe.SymmetricArray(A, vec)
-    end, function(x::FrankWolfe.SymmetricArray, lmo)
+        return FrankWolfe.SubspaceVector(A, vec)
+    end, function(x::FrankWolfe.SubspaceVector, lmo)
         for (i, ind) in enumerate(indices)
             @view(x.data[ind]) .= x.vec[i] / sqmul[i]
         end
@@ -236,16 +236,16 @@ function build_reduce_inflate(p::Array{T, N}) where {T <: Number, N}
     end
 end
 
-reduce, inflate = build_reduce_inflate(p)
-p_reduce = reduce(p, nothing)
-x0_reduce = reduce(x0, nothing)
-f_reduce = let p_reduce = p_reduce, normp2 = normp2
-    x -> LinearAlgebra.dot(x, x) / 2 - LinearAlgebra.dot(p_reduce, x) + normp2
+deflate, inflate = build_deflate_inflate(p)
+p_deflate = deflate(p, nothing)
+x0_deflate = deflate(x0, nothing)
+f_deflate = let p_deflate = p_deflate, normp2 = normp2
+    x -> LinearAlgebra.dot(x, x) / 2 - LinearAlgebra.dot(p_deflate, x) + normp2
 end
-grad_reduce! = let p_reduce = p_reduce
+grad_deflate! = let p_deflate = p_deflate
     (storage, x) -> begin
         @inbounds for i in eachindex(x)
-            storage[i] = x[i] - p_reduce[i]
+            storage[i] = x[i] - p_deflate[i]
         end
     end
 end
@@ -255,9 +255,9 @@ println() #hide
 # In this simple example, their shape remains unchanged, but in general this may need some
 # reformulation, which falls to the user.
 
-lmo_reduce = FrankWolfe.SymmetricLMO(lmo_naive, reduce, inflate)
-FrankWolfe.blended_pairwise_conditional_gradient(f_reduce, grad_reduce!, lmo_reduce, FrankWolfe.ActiveSetQuadratic([(one(T), x0_reduce)], LinearAlgebra.I, -p_reduce); verbose=false, lazy=true, line_search=FrankWolfe.Shortstep(one(T)), max_iteration=10) #hide
-asq_reduce = FrankWolfe.ActiveSetQuadratic([(one(T), x0_reduce)], LinearAlgebra.I, -p_reduce)
-@time FrankWolfe.blended_pairwise_conditional_gradient(f_reduce, grad_reduce!, lmo_reduce, asq_reduce; verbose, lazy=true, line_search=FrankWolfe.Shortstep(one(T)), max_iteration)
+lmo_deflate = FrankWolfe.SubspaceLMO(lmo_naive, deflate, inflate)
+FrankWolfe.blended_pairwise_conditional_gradient(f_deflate, grad_deflate!, lmo_deflate, FrankWolfe.ActiveSetQuadraticProductCaching([(one(T), x0_deflate)], LinearAlgebra.I, -p_deflate); verbose=false, lazy=true, line_search=FrankWolfe.Shortstep(one(T)), max_iteration=10) #hide
+asq_deflate = FrankWolfe.ActiveSetQuadraticProductCaching([(one(T), x0_deflate)], LinearAlgebra.I, -p_deflate)
+@time FrankWolfe.blended_pairwise_conditional_gradient(f_deflate, grad_deflate!, lmo_deflate, asq_deflate; verbose, lazy=true, line_search=FrankWolfe.Shortstep(one(T)), max_iteration)
 println() #hide
 
diff --git a/examples/quadratic.jl b/examples/quadratic.jl
index 923fbc4f3..450ea0901 100644
--- a/examples/quadratic.jl
+++ b/examples/quadratic.jl
@@ -74,7 +74,7 @@ function benchmark_Bell(p::Array{T, 2}, quadratic::Bool; fw_method=FrankWolfe.bl
     lmo = BellCorrelationsLMOHeuristic{T}(size(p, 1), zeros(T, size(p, 1)))
     x0 = FrankWolfe.compute_extreme_point(lmo, -p)
     if quadratic
-        active_set = FrankWolfe.ActiveSetQuadratic([(one(T), x0)], I, -p)
+        active_set = FrankWolfe.ActiveSetQuadraticProductCaching([(one(T), x0)], I, -p)
     else
         active_set = FrankWolfe.ActiveSet([(one(T), x0)])
     end
diff --git a/examples/quadratic_A.jl b/examples/quadratic_A.jl
index 5ac879c47..c65643a82 100644
--- a/examples/quadratic_A.jl
+++ b/examples/quadratic_A.jl
@@ -48,7 +48,7 @@ x0 = FrankWolfe.compute_extreme_point(lmo, zeros(T, n+1))
 # active_set = FrankWolfe.ActiveSet([(1.0, x0)])
 
 # specialized active set, automatically detecting the parameters A and b of the quadratic function f
-active_set = FrankWolfe.ActiveSetQuadratic([(one(T), x0)], gradf)
+active_set = FrankWolfe.ActiveSetQuadraticProductCaching([(one(T), x0)], gradf)
 
 @time res = FrankWolfe.blended_pairwise_conditional_gradient(
 #  @time res = FrankWolfe.away_frank_wolfe(
diff --git a/examples/reynolds.jl b/examples/reynolds.jl
index fe28bca53..0cb6f3c32 100644
--- a/examples/reynolds.jl
+++ b/examples/reynolds.jl
@@ -86,12 +86,12 @@ function benchmark_Bell(p::Array{T, 3}, sym::Bool; kwargs...) where {T <: Number
     end
     lmo = BellCorrelationsLMO{T}(size(p, 1), zeros(T, size(p, 1)))
     if sym
-        lmo = FrankWolfe.SymmetricLMO(lmo, reynolds_permutedims, reynolds_adjoint)
+        lmo = FrankWolfe.SubspaceLMO(lmo, reynolds_permutedims, reynolds_adjoint)
     end
     x0 = FrankWolfe.compute_extreme_point(lmo, -p)
     println("Output type of the LMO: ", typeof(x0))
     active_set = FrankWolfe.ActiveSet([(one(T), x0)])
-    # active_set = FrankWolfe.ActiveSetQuadratic([(one(T), x0)], I, -p)
+    # active_set = FrankWolfe.ActiveSetQuadraticProductCaching([(one(T), x0)], I, -p)
     return FrankWolfe.blended_pairwise_conditional_gradient(f, grad!, lmo, active_set; lazy=true, line_search=FrankWolfe.Shortstep(one(T)), kwargs...)
 end
 
diff --git a/examples/symmetric.jl b/examples/symmetric.jl
index 000ca53bc..1d1018cf9 100644
--- a/examples/symmetric.jl
+++ b/examples/symmetric.jl
@@ -57,7 +57,7 @@ function correlation_tensor_GHZ_polygon(N::Int, m::Int; type=Float64)
     return res
 end
 
-function build_reduce_inflate_permutedims(p::Array{T, 2}) where {T <: Number}
+function build_deflate_inflate_permutedims(p::Array{T, 2}) where {T <: Number}
     n = size(p, 1)
     @assert n == size(p, 2)
     dimension = (n * (n + 1)) ÷ 2
@@ -72,8 +72,8 @@ function build_reduce_inflate_permutedims(p::Array{T, 2}) where {T <: Number}
                 vec[cnt+j] = (A[i, j] + A[j, i]) / sqrt2
             end
         end
-        return FrankWolfe.SymmetricArray(A, vec)
-    end, function(x::FrankWolfe.SymmetricArray, lmo)
+        return FrankWolfe.SubspaceVector(A, vec)
+    end, function(x::FrankWolfe.SubspaceVector, lmo)
         cnt = 0
         @inbounds for i in 1:n
             x.data[i, i] = x.vec[i]
@@ -90,9 +90,9 @@ end
 function benchmark_Bell(p::Array{T, 2}, sym::Bool; fw_method=FrankWolfe.blended_pairwise_conditional_gradient, kwargs...) where {T <: Number}
     Random.seed!(0)
     if sym
-        reduce, inflate = build_reduce_inflate_permutedims(p)
-        lmo = FrankWolfe.SymmetricLMO(BellCorrelationsLMOHeuristic{T}(size(p, 1), zeros(T, size(p, 1))), reduce, inflate)
-        p = reduce(p, lmo)
+        deflate, inflate = build_deflate_inflate_permutedims(p)
+        lmo = FrankWolfe.SubspaceLMO(BellCorrelationsLMOHeuristic{T}(size(p, 1), zeros(T, size(p, 1))), deflate, inflate)
+        p = deflate(p, lmo)
     else
         lmo = BellCorrelationsLMOHeuristic{T}(size(p, 1), zeros(T, size(p, 1)))
     end
@@ -109,7 +109,7 @@ function benchmark_Bell(p::Array{T, 2}, sym::Bool; fw_method=FrankWolfe.blended_
         end
     end
     x0 = FrankWolfe.compute_extreme_point(lmo, -p)
-    active_set = FrankWolfe.ActiveSetQuadratic([(one(T), x0)], I, -p)
+    active_set = FrankWolfe.ActiveSetQuadraticProductCaching([(one(T), x0)], I, -p)
     res = fw_method(f, grad!, lmo, active_set; line_search=FrankWolfe.Shortstep(one(T)), lazy=true, verbose=false, max_iteration=10^2)
     return fw_method(f, grad!, lmo, res[6]; line_search=FrankWolfe.Shortstep(one(T)), lazy=true, lazy_tolerance=10^6, kwargs...)
 end
diff --git a/src/abstract_oracles.jl b/src/abstract_oracles.jl
index 5b61960f0..c1c1154e4 100644
--- a/src/abstract_oracles.jl
+++ b/src/abstract_oracles.jl
@@ -258,23 +258,28 @@ function compute_extreme_point(
 end
 
 """
-    SymmetricLMO{LMO, TR, TI}
+    SubspaceLMO{LMO, TD, TI}
 
-Symmetric LMO for the reduction operator defined by `TR`
+Subspace LMO for the deflation operator defined by `TD`
 and the inflation operator defined by `TI`.
-Computations are performed in the reduced subspace, and the
+Computations are performed in the deflated subspace, and the
 effective call of the LMO first inflates the gradient, then
-use the non-symmetric LMO, and finally reduces the output.
+uses the full LMO, and finally deflates the output.
+
+Deflation operators typically project onto symmetric subspaces
+or select relevant elements of a full tensor.
+Scalar products should be treated carefully to ensure correctness
+of the results; see also the companion `SubspaceVector` structure.
 """
-struct SymmetricLMO{LMO<:LinearMinimizationOracle,TR,TI} <: LinearMinimizationOracle
+struct SubspaceLMO{LMO<:LinearMinimizationOracle,TD,TI} <: LinearMinimizationOracle
     lmo::LMO
-    reduce::TR
+    deflate::TD
     inflate::TI
-    function SymmetricLMO(lmo::LMO, reduce, inflate=(x, lmo) -> x) where {LMO<:LinearMinimizationOracle}
-        return new{typeof(lmo),typeof(reduce),typeof(inflate)}( lmo, reduce, inflate)
+    function SubspaceLMO(lmo::LMO, deflate, inflate=(x, lmo) -> x) where {LMO<:LinearMinimizationOracle}
+        return new{typeof(lmo),typeof(deflate),typeof(inflate)}( lmo, deflate, inflate)
     end
 end
 
-function compute_extreme_point(sym::SymmetricLMO, direction; kwargs...)
-    return sym.reduce(compute_extreme_point(sym.lmo, sym.inflate(direction, sym.lmo)), sym.lmo)
+function compute_extreme_point(sym::SubspaceLMO, direction; kwargs...)
+    return sym.deflate(compute_extreme_point(sym.lmo, sym.inflate(direction, sym.lmo)), sym.lmo)
 end
diff --git a/src/active_set_quadratic.jl b/src/active_set_quadratic.jl
index 08f8e4e96..0b19d7dba 100644
--- a/src/active_set_quadratic.jl
+++ b/src/active_set_quadratic.jl
@@ -1,6 +1,6 @@
 
 """
-    ActiveSetQuadratic{AT, R, IT}
+    ActiveSetQuadraticProductCaching{AT, R, IT}
 
 Represents an active set of extreme vertices collected in a FW algorithm,
 along with their coefficients `(λ_i, a_i)`.
@@ -9,7 +9,7 @@ The iterate `x = ∑λ_i a_i` is stored in x with type `IT`.
 The objective function is assumed to be of the form `f(x)=½⟨x,Ax⟩+⟨b,x⟩+c`
 so that the gradient is simply `∇f(x)=Ax+b`.
 """
-struct ActiveSetQuadratic{AT, R <: Real, IT, H} <: AbstractActiveSet{AT,R,IT}
+struct ActiveSetQuadraticProductCaching{AT, R <: Real, IT, H} <: AbstractActiveSet{AT,R,IT}
     weights::Vector{R}
     atoms::Vector{AT}
     x::IT
@@ -48,21 +48,21 @@ function detect_quadratic_function(grad!, x0; test=true)
     return A, b
 end
 
-function ActiveSetQuadratic(tuple_values::AbstractVector{Tuple{R,AT}}, grad!::Function) where {AT,R}
+function ActiveSetQuadraticProductCaching(tuple_values::AbstractVector{Tuple{R,AT}}, grad!::Function) where {AT,R}
     A, b = detect_quadratic_function(grad!, tuple_values[1][2])
-    return ActiveSetQuadratic(tuple_values, A, b)
+    return ActiveSetQuadraticProductCaching(tuple_values, A, b)
 end
 
-function ActiveSetQuadratic(tuple_values::AbstractVector{Tuple{R,AT}}, A::H, b) where {AT,R,H}
-    return ActiveSetQuadratic{AT,R}(tuple_values, A, b)
+function ActiveSetQuadraticProductCaching(tuple_values::AbstractVector{Tuple{R,AT}}, A::H, b) where {AT,R,H}
+    return ActiveSetQuadraticProductCaching{AT,R}(tuple_values, A, b)
 end
 
-function ActiveSetQuadratic{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, grad!::Function) where {AT,R}
+function ActiveSetQuadraticProductCaching{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, grad!::Function) where {AT,R}
     A, b = detect_quadratic_function(grad!, tuple_values[1][2])
-    return ActiveSetQuadratic{AT,R}(tuple_values, A, b)
+    return ActiveSetQuadraticProductCaching{AT,R}(tuple_values, A, b)
 end
 
-function ActiveSetQuadratic{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, A::H, b) where {AT,R,H}
+function ActiveSetQuadraticProductCaching{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, A::H, b) where {AT,R,H}
     n = length(tuple_values)
     weights = Vector{R}(undef, n)
     atoms = Vector{AT}(undef, n)
@@ -76,7 +76,7 @@ function ActiveSetQuadratic{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,
         atoms[idx] = tuple_values[idx][2]
     end
     x = similar(b)
-    as = ActiveSetQuadratic{AT,R,typeof(x),H}(weights, atoms, x, A, b, dots_x, dots_A, dots_b, weights_prev, modified)
+    as = ActiveSetQuadraticProductCaching{AT,R,typeof(x),H}(weights, atoms, x, A, b, dots_x, dots_A, dots_b, weights_prev, modified)
     reset_quadratic_dots!(as)
     compute_active_set_iterate!(as)
     return as
@@ -84,7 +84,7 @@ end
 
 # should only be called upon construction
 # for active sets with a large number of atoms, this function becomes very costly
-function reset_quadratic_dots!(as::ActiveSetQuadratic{AT,R}) where {AT,R}
+function reset_quadratic_dots!(as::ActiveSetQuadraticProductCaching{AT,R}) where {AT,R}
     @inbounds for idx in 1:length(as)
         as.dots_A[idx] = Vector{R}(undef, idx)
         for idy in 1:idx
@@ -108,16 +108,16 @@ function Base.:*(a::Identity, b)
     end
 end
 
-function ActiveSetQuadratic(tuple_values::AbstractVector{Tuple{R,AT}}, A::UniformScaling, b) where {AT,R}
-    return ActiveSetQuadratic(tuple_values, Identity(A.λ), b)
+function ActiveSetQuadraticProductCaching(tuple_values::AbstractVector{Tuple{R,AT}}, A::UniformScaling, b) where {AT,R}
+    return ActiveSetQuadraticProductCaching(tuple_values, Identity(A.λ), b)
 end
-function ActiveSetQuadratic{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, A::UniformScaling, b) where {AT,R}
-    return ActiveSetQuadratic{AT,R}(tuple_values, Identity(A.λ), b)
+function ActiveSetQuadraticProductCaching{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, A::UniformScaling, b) where {AT,R}
+    return ActiveSetQuadraticProductCaching{AT,R}(tuple_values, Identity(A.λ), b)
 end
 
 # these three functions do not update the active set iterate
 
-function Base.push!(as::ActiveSetQuadratic{AT,R}, (λ, a)) where {AT,R}
+function Base.push!(as::ActiveSetQuadraticProductCaching{AT,R}, (λ, a)) where {AT,R}
     dot_x = zero(R)
     dot_A = Vector{R}(undef, length(as))
     dot_b = fast_dot(as.b, a)
@@ -140,7 +140,7 @@ function Base.push!(as::ActiveSetQuadratic{AT,R}, (λ, a)) where {AT,R}
 end
 
 # TODO multi-indices version
-function Base.deleteat!(as::ActiveSetQuadratic, idx::Int)
+function Base.deleteat!(as::ActiveSetQuadraticProductCaching, idx::Int)
     @inbounds for i in 1:idx-1
         as.dots_x[i] -= as.weights_prev[idx] * as.dots_A[idx][i]
     end
@@ -158,7 +158,7 @@ function Base.deleteat!(as::ActiveSetQuadratic, idx::Int)
     return as
 end
 
-function Base.empty!(as::ActiveSetQuadratic)
+function Base.empty!(as::ActiveSetQuadraticProductCaching)
     empty!(as.atoms)
     empty!(as.weights)
     as.x .= 0
@@ -171,7 +171,7 @@ function Base.empty!(as::ActiveSetQuadratic)
 end
 
 function active_set_update!(
-    active_set::ActiveSetQuadratic{AT,R},
+    active_set::ActiveSetQuadraticProductCaching{AT,R},
     lambda, atom, renorm=true, idx=nothing;
     weight_purge_threshold=weight_purge_threshold_default(R),
     add_dropped_vertices=false,
@@ -200,7 +200,7 @@ function active_set_update!(
     return active_set
 end
 
-function active_set_renormalize!(active_set::ActiveSetQuadratic)
+function active_set_renormalize!(active_set::ActiveSetQuadraticProductCaching)
     renorm = sum(active_set.weights)
     active_set.weights ./= renorm
     active_set.weights_prev ./= renorm
@@ -209,7 +209,7 @@ function active_set_renormalize!(active_set::ActiveSetQuadratic)
     return active_set
 end
 
-function active_set_argmin(active_set::ActiveSetQuadratic, direction)
+function active_set_argmin(active_set::ActiveSetQuadraticProductCaching, direction)
     valm = typemax(eltype(direction))
     idxm = -1
     idx_modified = findall(active_set.modified)
@@ -240,7 +240,7 @@ function active_set_argmin(active_set::ActiveSetQuadratic, direction)
     return (active_set[idxm]..., idxm)
 end
 
-function active_set_argminmax(active_set::ActiveSetQuadratic, direction; Φ=0.5)
+function active_set_argminmax(active_set::ActiveSetQuadraticProductCaching, direction; Φ=0.5)
     valm = typemax(eltype(direction))
     valM = typemin(eltype(direction))
     idxm = -1
@@ -281,7 +281,7 @@ function active_set_argminmax(active_set::ActiveSetQuadratic, direction; Φ=0.5)
 end
 
 # in-place warm-start of a quadratic active set for A and b
-function update_active_set_quadratic!(warm_as::ActiveSetQuadratic{AT,R}, A::H, b) where {AT,R,H}
+function update_active_set_quadratic!(warm_as::ActiveSetQuadraticProductCaching{AT,R}, A::H, b) where {AT,R,H}
     @inbounds for idx in eachindex(warm_as)
         for idy in 1:idx
             warm_as.dots_A[idx][idy] = fast_dot(A * warm_as.atoms[idx], warm_as.atoms[idy])
@@ -292,7 +292,7 @@ function update_active_set_quadratic!(warm_as::ActiveSetQuadratic{AT,R}, A::H, b
 end
 
 # in-place warm-start of a quadratic active set for b
-function update_active_set_quadratic!(warm_as::ActiveSetQuadratic{AT,R,IT,H}, b) where {AT,R,IT,H}
+function update_active_set_quadratic!(warm_as::ActiveSetQuadraticProductCaching{AT,R,IT,H}, b) where {AT,R,IT,H}
     warm_as.dots_x .= 0
     warm_as.weights_prev .= 0
     warm_as.modified .= true
@@ -304,7 +304,7 @@ function update_active_set_quadratic!(warm_as::ActiveSetQuadratic{AT,R,IT,H}, b)
     return warm_as
 end
 
-function update_weights!(as::ActiveSetQuadratic, new_weights)
+function update_weights!(as::ActiveSetQuadraticProductCaching, new_weights)
     as.weights_prev .= as.weights
     as.weights .= new_weights
     as.modified .= true
diff --git a/src/active_set_quadratic_direct_solve.jl b/src/active_set_quadratic_direct_solve.jl
index c90ab0a8e..4a560377d 100644
--- a/src/active_set_quadratic_direct_solve.jl
+++ b/src/active_set_quadratic_direct_solve.jl
@@ -55,7 +55,7 @@ end
 """
     ActiveSetQuadraticLinearSolve(tuple_values::Vector{Tuple{R,AT}}, A, b, lp_optimizer)
 
-Creates an `ActiveSetQuadraticLinearSolve` from the given Hessian `A`, linear term `b` and `lp_optimizer` by creating an inner `ActiveSetQuadratic` active set.
+Creates an `ActiveSetQuadraticLinearSolve` from the given Hessian `A`, linear term `b` and `lp_optimizer` by creating an inner `ActiveSetQuadraticProductCaching` active set.
 """
 function ActiveSetQuadraticLinearSolve(
     tuple_values::Vector{Tuple{R,AT}},
@@ -64,7 +64,7 @@ function ActiveSetQuadraticLinearSolve(
     lp_optimizer;
     scheduler=LogScheduler(),
 ) where {AT,R,H}
-    inner_as = ActiveSetQuadratic(tuple_values, A, b)
+    inner_as = ActiveSetQuadraticProductCaching(tuple_values, A, b)
     return ActiveSetQuadraticLinearSolve(
         inner_as.weights,
         inner_as.atoms,
@@ -155,7 +155,7 @@ function ActiveSetQuadraticLinearSolve{AT,R}(
     lp_optimizer;
     scheduler=LogScheduler(),
 ) where {AT,R,H}
-    inner_as = ActiveSetQuadratic{AT,R}(tuple_values, A, b)
+    inner_as = ActiveSetQuadraticProductCaching{AT,R}(tuple_values, A, b)
     as = ActiveSetQuadraticLinearSolve{AT,R,typeof(x),H}(
         inner_as.weights,
         inner_as.atoms,
diff --git a/src/pairwise.jl b/src/pairwise.jl
index 0843d8039..5a4e7c7e9 100644
--- a/src/pairwise.jl
+++ b/src/pairwise.jl
@@ -261,7 +261,7 @@ function pairwise_frank_wolfe(
                 active_set,
                 -gamma,
                 away_vertex,
-                true,
+                false,
                 away_index,
                 add_dropped_vertices=use_extra_vertex_storage,
                 vertex_storage=extra_vertex_storage,
@@ -273,9 +273,11 @@ function pairwise_frank_wolfe(
                     end
                 end
             end
-            # fw update 
+            # fw update
             active_set_update!(active_set, gamma, fw_vertex, renorm, fw_index)
         end
+        # println(active_set.weights)
+        # println([atom[1] for atom in active_set.atoms])
 
         if callback !== nothing
             state = CallbackState(
diff --git a/src/types.jl b/src/types.jl
index 0a00c2d16..e5bc9b266 100644
--- a/src/types.jl
+++ b/src/types.jl
@@ -256,55 +256,60 @@ Base.@propagate_inbounds function muladd_memory_mode(::InplaceEmphasis, x::Matri
 end
 
 """
-    SymmetricArray{T, DT}
+    SubspaceVector{HasMultiplicities, T}
 
+Companion structure of `SubspaceLMO` containing three fields:
+- `data` is the full structure to be deflated,
+- `vec` is a vector in the reduced subspace in which computations are performed,
+- `mul` is only used to compute scalar products when `HasMultiplicities = true`,
+which should be avoided (when possible) for performance reasons.
 """
-struct SymmetricArray{HasMultiplicities,T,DT,V<:AbstractVector{T}} <: AbstractVector{T}
+struct SubspaceVector{HasMultiplicities,T,DT,V<:AbstractVector{T}} <: AbstractVector{T}
     data::DT # full array to be symmetrised, will generally fall out of sync wrt vec
     vec::V # vector representing the array
     mul::Vector{T} # only used for scalar products
 end
 
-function SymmetricArray(data::DT, vec::V) where {DT,V<:AbstractVector{T}} where {T}
-    return SymmetricArray{false,T,DT,V}(data, vec, T[])
+function SubspaceVector(data::DT, vec::V) where {DT,V<:AbstractVector{T}} where {T}
+    return SubspaceVector{false,T,DT,V}(data, vec, T[])
 end
 
-function SymmetricArray(data::DT, vec::V, mul::Vector) where {DT,V<:AbstractVector{T}} where {T}
-    return SymmetricArray{true,T,DT,V}(data, vec, convert(Vector{T}, mul))
+function SubspaceVector(data::DT, vec::V, mul::Vector) where {DT,V<:AbstractVector{T}} where {T}
+    return SubspaceVector{true,T,DT,V}(data, vec, convert(Vector{T}, mul))
 end
 
-Base.@propagate_inbounds function Base.getindex(A::SymmetricArray, i)
+Base.@propagate_inbounds function Base.getindex(A::SubspaceVector, i)
     @boundscheck checkbounds(A.vec, i)
     return @inbounds getindex(A.vec, i)
 end
 
-Base.@propagate_inbounds function Base.setindex!(A::SymmetricArray, x, i)
+Base.@propagate_inbounds function Base.setindex!(A::SubspaceVector, x, i)
     @boundscheck checkbounds(A.vec, i)
     return @inbounds setindex!(A.vec, x, i)
 end
 
-Base.size(A::SymmetricArray) = size(A.vec)
-Base.eltype(A::SymmetricArray) = eltype(A.vec)
-Base.similar(A::SymmetricArray{true}) = SymmetricArray(similar(A.data), similar(A.vec), A.mul)
-Base.similar(A::SymmetricArray{false}) = SymmetricArray(similar(A.data), similar(A.vec))
-Base.similar(A::SymmetricArray{true}, ::Type{T}) where {T} = SymmetricArray(similar(A.data, T), similar(A.vec, T), convert(Vector{T}, A.mul))
-Base.similar(A::SymmetricArray{false}, ::Type{T}) where {T} = SymmetricArray(similar(A.data, T), similar(A.vec, T))
-Base.collect(A::SymmetricArray{true}) = SymmetricArray(collect(A.data), collect(A.vec), A.mul)
-Base.collect(A::SymmetricArray{false}) = SymmetricArray(collect(A.data), collect(A.vec))
-Base.copyto!(dest::SymmetricArray, src::SymmetricArray) = copyto!(dest.vec, src.vec)
-Base.:*(scalar::Real, A::SymmetricArray{true}) = SymmetricArray(A.data, scalar * A.vec, A.mul)
-Base.:*(scalar::Real, A::SymmetricArray{false}) = SymmetricArray(A.data, scalar * A.vec)
-Base.:*(A::SymmetricArray, scalar::Real) = scalar * A
-Base.:/(A::SymmetricArray, scalar::Real) = inv(scalar) * A
-Base.:+(A1::SymmetricArray{true,T}, A2::SymmetricArray{true,T}) where {T} = SymmetricArray(A1.data, A1.vec + A2.vec, A1.mul)
-Base.:+(A1::SymmetricArray{false,T}, A2::SymmetricArray{false,T}) where {T} = SymmetricArray(A1.data, A1.vec + A2.vec)
-Base.:-(A1::SymmetricArray{true,T}, A2::SymmetricArray{true,T}) where {T} = SymmetricArray(A1.data, A1.vec - A2.vec, A1.mul)
-Base.:-(A1::SymmetricArray{false,T}, A2::SymmetricArray{false,T}) where {T} = SymmetricArray(A1.data, A1.vec - A2.vec)
-Base.:-(A::SymmetricArray{true,T}) where {T} = SymmetricArray(A.data, -A.vec, A.mul)
-Base.:-(A::SymmetricArray{false,T}) where {T} = SymmetricArray(A.data, -A.vec)
-
-LinearAlgebra.dot(A1::SymmetricArray{true}, A2::SymmetricArray{true}) = dot(A1.vec, Diagonal(A1.mul), A2.vec)
-LinearAlgebra.dot(A1::SymmetricArray{false}, A2::SymmetricArray{false}) = dot(A1.vec, A2.vec)
-LinearAlgebra.norm(A::SymmetricArray) = sqrt(dot(A, A))
-
-Base.@propagate_inbounds Base.isequal(A1::SymmetricArray, A2::SymmetricArray) = isequal(A1.vec, A2.vec)
+Base.size(A::SubspaceVector) = size(A.vec)
+Base.eltype(A::SubspaceVector) = eltype(A.vec)
+Base.similar(A::SubspaceVector{true}) = SubspaceVector(similar(A.data), similar(A.vec), A.mul)
+Base.similar(A::SubspaceVector{false}) = SubspaceVector(similar(A.data), similar(A.vec))
+Base.similar(A::SubspaceVector{true}, ::Type{T}) where {T} = SubspaceVector(similar(A.data, T), similar(A.vec, T), convert(Vector{T}, A.mul))
+Base.similar(A::SubspaceVector{false}, ::Type{T}) where {T} = SubspaceVector(similar(A.data, T), similar(A.vec, T))
+Base.collect(A::SubspaceVector{true}) = SubspaceVector(collect(A.data), collect(A.vec), A.mul)
+Base.collect(A::SubspaceVector{false}) = SubspaceVector(collect(A.data), collect(A.vec))
+Base.copyto!(dest::SubspaceVector, src::SubspaceVector) = copyto!(dest.vec, src.vec)
+Base.:*(scalar::Real, A::SubspaceVector{true}) = SubspaceVector(A.data, scalar * A.vec, A.mul)
+Base.:*(scalar::Real, A::SubspaceVector{false}) = SubspaceVector(A.data, scalar * A.vec)
+Base.:*(A::SubspaceVector, scalar::Real) = scalar * A
+Base.:/(A::SubspaceVector, scalar::Real) = inv(scalar) * A
+Base.:+(A1::SubspaceVector{true,T}, A2::SubspaceVector{true,T}) where {T} = SubspaceVector(A1.data, A1.vec + A2.vec, A1.mul)
+Base.:+(A1::SubspaceVector{false,T}, A2::SubspaceVector{false,T}) where {T} = SubspaceVector(A1.data, A1.vec + A2.vec)
+Base.:-(A1::SubspaceVector{true,T}, A2::SubspaceVector{true,T}) where {T} = SubspaceVector(A1.data, A1.vec - A2.vec, A1.mul)
+Base.:-(A1::SubspaceVector{false,T}, A2::SubspaceVector{false,T}) where {T} = SubspaceVector(A1.data, A1.vec - A2.vec)
+Base.:-(A::SubspaceVector{true,T}) where {T} = SubspaceVector(A.data, -A.vec, A.mul)
+Base.:-(A::SubspaceVector{false,T}) where {T} = SubspaceVector(A.data, -A.vec)
+
+LinearAlgebra.dot(A1::SubspaceVector{true}, A2::SubspaceVector{true}) = dot(A1.vec, Diagonal(A1.mul), A2.vec)
+LinearAlgebra.dot(A1::SubspaceVector{false}, A2::SubspaceVector{false}) = dot(A1.vec, A2.vec)
+LinearAlgebra.norm(A::SubspaceVector) = sqrt(dot(A, A))
+
+Base.@propagate_inbounds Base.isequal(A1::SubspaceVector, A2::SubspaceVector) = isequal(A1.vec, A2.vec)
diff --git a/test/active_set.jl b/test/active_set.jl
index 101b17d07..29c98f62a 100644
--- a/test/active_set.jl
+++ b/test/active_set.jl
@@ -4,7 +4,7 @@ using FrankWolfe
 import FrankWolfe: ActiveSet
 using LinearAlgebra: norm
 
-@testset "Active sets" begin
+@testset "Active sets                                       " begin
 
     @testset "Constructors and eltypes" begin
         active_set = ActiveSet([(0.1, [1, 2, 3]), (0.9, [2, 3, 4]), (0.0, [5, 6, 7])])
@@ -100,7 +100,7 @@ using LinearAlgebra: norm
     end
 end
 
-@testset "Simplex gradient descent" begin
+@testset "Simplex gradient descent                          " begin
     # Gradient descent over a 2-D unit simplex
     # each atom is a vertex, direction points to [1,1]
     # note: integers for atom element types
@@ -175,7 +175,7 @@ end
     end
 end
 
-@testset "LP separation oracle" begin
+@testset "LP separation oracle                              " begin
     # Gradient descent over a L-inf ball of radius one
     # current active set contains 3 vertices
     # direction points to [1,1]
@@ -212,7 +212,7 @@ end
     )
 end
 
-@testset "Argminmax" begin
+@testset "Argminmax                                         " begin
     active_set = FrankWolfe.ActiveSet([(0.6, [-1, -1]), (0.2, [0, 1]), (0.2, [1, 0])])
     (λ_min, a_min, i_min, val, λ_max, a_max, i_max, valM, progress) =
         FrankWolfe.active_set_argminmax(active_set, [1, 1.5])
@@ -221,7 +221,7 @@ end
     @test_throws ErrorException FrankWolfe.active_set_argminmax(active_set, [NaN, NaN])
 end
 
-@testset "LPseparationWithScaledHotVector" begin
+@testset "LPseparationWithScaledHotVector                   " begin
     v1 = FrankWolfe.ScaledHotVector(1, 1, 2)
     v2 = FrankWolfe.ScaledHotVector(1, 2, 2)
     v3 = FrankWolfe.ScaledHotVector(0, 2, 2)
@@ -240,7 +240,7 @@ end
     )
 end
 
-@testset "ActiveSet for BigFloat" begin
+@testset "ActiveSet for BigFloat                            " begin
     n = Int(1e2)
     lmo = FrankWolfe.LpNormLMO{BigFloat,1}(rand())
     x0 = Vector(FrankWolfe.compute_extreme_point(lmo, zeros(n)))
diff --git a/test/active_set_variants.jl b/test/active_set_variants.jl
index 59b4a1cad..c191389f7 100644
--- a/test/active_set_variants.jl
+++ b/test/active_set_variants.jl
@@ -9,7 +9,7 @@ function test_callback(state, active_set, args...)
     @test grad0 ≈ state.gradient
 end
 
-@testset "Testing active set Frank-Wolfe variants, BPFW, AFW, and PFW, including their lazy versions" begin
+@testset "Testing active set based Frank-Wolfe variants     " begin
     f(x) = norm(x)^2
     function grad!(storage, x)
         @. storage = 2x
@@ -134,7 +134,7 @@ end
     )
 end
 
-@testset "recompute or not last vertex" begin
+@testset "Recompute or not last vertex                      " begin
     n = 10
     f(x) = norm(x)^2
     function grad!(storage, x)
@@ -171,7 +171,7 @@ end
     @test lmo.counter == prev_counter - 1
 end
 
-@testset "Quadratic active set" begin
+@testset "Quadratic active set                              " begin
     n = 2 # number of dimensions
     p = 10 # number of points
     function simple_reg_loss(θ, data_point)
@@ -218,7 +218,7 @@ end
     end
     lmo = FrankWolfe.LpNormLMO{Float64, 2}(1.05 * norm(params_perfect))
     x0 = FrankWolfe.compute_extreme_point(lmo, zeros(Float64, n+1))
-    active_set = FrankWolfe.ActiveSetQuadratic([(1.0, x0)], gradf)
+    active_set = FrankWolfe.ActiveSetQuadraticProductCaching([(1.0, x0)], gradf)
     res = FrankWolfe.blended_pairwise_conditional_gradient(
         f,
         gradf,
diff --git a/test/alternating_methods_tests.jl b/test/alternating_methods_tests.jl
index 7bf9487a6..ddbd2bb8a 100644
--- a/test/alternating_methods_tests.jl
+++ b/test/alternating_methods_tests.jl
@@ -19,7 +19,7 @@ lmo1 = FrankWolfe.ScaledBoundLInfNormBall(-ones(n), zeros(n))
 lmo2 = FrankWolfe.ScaledBoundLInfNormBall(zeros(n), ones(n))
 lmo3 = FrankWolfe.ScaledBoundLInfNormBall(ones(n), 2 * ones(n))
 
-@testset "Testing alternating linear minimization with block coordinate FW for different LMO-pairs " begin
+@testset "Testing ALM with block-coordinate FW              " begin
 
     x, _ = FrankWolfe.alternating_linear_minimization(
         FrankWolfe.block_coordinate_frank_wolfe,
@@ -102,7 +102,7 @@ lmo3 = FrankWolfe.ScaledBoundLInfNormBall(ones(n), 2 * ones(n))
         (storage, x) -> storage .= zero(x),
         (lmo1, lmo3),
         ones(n),
-        verbose=true,
+        verbose=false,
         line_search=FrankWolfe.Shortstep(2),
     )
 
@@ -116,7 +116,7 @@ lmo3 = FrankWolfe.ScaledBoundLInfNormBall(ones(n), 2 * ones(n))
         (lmo1, lmo_prob),
         ones(n),
         trajectory=true,
-        verbose=true,
+        verbose=false,
     )
 
     @test abs(x.blocks[1][1]) < 1e-6
@@ -142,10 +142,10 @@ lmo3 = FrankWolfe.ScaledBoundLInfNormBall(ones(n), 2 * ones(n))
 
 end
 
-@testset "Testing different update orders for block coordinate FW in within alternating linear minimization" begin
+@testset "Testing update orders for block-coordinate ALM-FW " begin
 
     orders = [
-        FrankWolfe.FullUpdate(), 
+        FrankWolfe.FullUpdate(),
         [FrankWolfe.CyclicUpdate(i) for i in [-1, 1, 2]]...,
         [FrankWolfe.StochasticUpdate(i) for i in [-1, 1, 2]]...,
         [FrankWolfe.DualGapOrder(i) for i in [-1, 1, 2]]...,
@@ -168,7 +168,7 @@ end
     end
 end
 
-@testset "Testing alternating linear minimization with different FW methods" begin
+@testset "Testing ALM with different FW methods             " begin
 
     methods = [
         FrankWolfe.frank_wolfe,
@@ -191,7 +191,7 @@ end
     end
 end
 
-@testset "Testing block-coordinate FW with different update steps and linesearch strategies" begin
+@testset "Testing stepsize/linesearch in block-coordinate FW" begin
 
     x, _, _, _, _ = FrankWolfe.alternating_linear_minimization(
         FrankWolfe.block_coordinate_frank_wolfe,
@@ -256,9 +256,9 @@ end
     @test abs(x.blocks[2][1] - 1 / n) < 1e-6
 end
 
-@testset "Testing alternating projections for different LMO-pairs " begin
+@testset "Testing alternating projections                   " begin
 
-    x, _, _, _, _ = FrankWolfe.alternating_projections((lmo1, lmo_prob), rand(n), verbose=true)
+    x, _, _, _, _ = FrankWolfe.alternating_projections((lmo1, lmo_prob), rand(n), verbose=false)
 
     @test abs(x.blocks[1][1]) < 1e-6
     @test abs(x.blocks[2][1] - 1 / n) < 1e-6
diff --git a/test/as_quadratic_projection.jl b/test/as_quadratic_projection.jl
index 2060d1997..66f76e488 100644
--- a/test/as_quadratic_projection.jl
+++ b/test/as_quadratic_projection.jl
@@ -11,7 +11,7 @@ k = 10000
 
 # s = rand(1:100)
 s = 10
-@info "Seed $s"
+# @info "Seed $s"
 Random.seed!(s)
 
 
diff --git a/test/bcg_direction_error.jl b/test/bcg_direction_error.jl
index 6c1ab0dba..b0fa28b89 100644
--- a/test/bcg_direction_error.jl
+++ b/test/bcg_direction_error.jl
@@ -33,7 +33,7 @@ x, v, primal, dual_gap, _, _ = FrankWolfe.blended_conditional_gradient(
     line_search=FrankWolfe.AdaptiveZerothOrder(L_est=2.0),
     print_iter=100,
     memory_mode=FrankWolfe.InplaceEmphasis(),
-    verbose=true,
+    verbose=false,
     trajectory=false,
     lazy_tolerance=1.0,
     weight_purge_threshold=1e-9,
@@ -55,7 +55,7 @@ x, v, primal_cut, dual_gap, _, _ = FrankWolfe.blended_conditional_gradient(
     line_search=FrankWolfe.AdaptiveZerothOrder(L_est=2.0),
     print_iter=k / 10,
     memory_mode=FrankWolfe.InplaceEmphasis(),
-    verbose=true,
+    verbose=false,
     trajectory=false,
     lazy_tolerance=1.0,
     weight_purge_threshold=1e-10,
diff --git a/test/benchmark.jl b/test/benchmark.jl
index 38cc72253..0b6f13063 100644
--- a/test/benchmark.jl
+++ b/test/benchmark.jl
@@ -1,7 +1,6 @@
 import FrankWolfe
 import LinearAlgebra
 
-
 n = Int(1e6);
 
 xpi = rand(n);
diff --git a/test/blocks.jl b/test/blocks.jl
index 485716bde..ab2909ecb 100644
--- a/test/blocks.jl
+++ b/test/blocks.jl
@@ -4,7 +4,7 @@ using LinearAlgebra
 using FrankWolfe
 using SparseArrays
 
-@testset "Block array behaviour" begin
+@testset "Block array behavior                              " begin
     arr = FrankWolfe.BlockVector([
         [
             1 3
@@ -53,7 +53,7 @@ using SparseArrays
 
 end
 
-@testset "FrankWolfe array methods" begin
+@testset "Frank-Wolfe array methods                         " begin
     @testset "Block arrays" begin
         mem = FrankWolfe.InplaceEmphasis()
         arr0 = FrankWolfe.BlockVector([
diff --git a/test/extra_storage.jl b/test/extra_storage.jl
index 26b90055a..1465bceb6 100644
--- a/test/extra_storage.jl
+++ b/test/extra_storage.jl
@@ -11,7 +11,7 @@ function grad!(storage, x)
     return storage .= x .- center0
 end
 
-@testset "Blended Pairwise Conditional Gradient" begin
+@testset "Blended Pairwise Conditional Gradient             " begin
     lmo = FrankWolfe.UnitSimplexOracle(4.3)
     tlmo = FrankWolfe.TrackingLMO(lmo)
 
@@ -59,7 +59,7 @@ end
     end
 end
 
-@testset "Away-Frank-Wolfe" begin
+@testset "Away-Frank-Wolfe                                  " begin
     lmo = FrankWolfe.UnitSimplexOracle(4.3)
     tlmo = FrankWolfe.TrackingLMO(lmo)
 
@@ -107,7 +107,7 @@ end
     end
 end
 
-@testset "Blended Pairwise Conditional Gradient" begin
+@testset "Blended Pairwise Conditional Gradient             " begin
     lmo = FrankWolfe.UnitSimplexOracle(4.3)
     tlmo = FrankWolfe.TrackingLMO(lmo)
 
diff --git a/test/function_gradient.jl b/test/function_gradient.jl
index 4bfd1a734..01b10a7b0 100644
--- a/test/function_gradient.jl
+++ b/test/function_gradient.jl
@@ -7,7 +7,7 @@ import FrankWolfe: compute_gradient, compute_value, compute_value_gradient
 
 Random.seed!(123)
 
-@testset "Simple and stochastic function gradient interface" begin
+@testset "Simple and stochastic function gradient interface " begin
     for n in (1, 5, 20)
         A = rand(Float16, n, n)
         A .+= A'
diff --git a/test/generic-arrays.jl b/test/generic-arrays.jl
index eab5107a9..53e0a8a17 100644
--- a/test/generic-arrays.jl
+++ b/test/generic-arrays.jl
@@ -8,7 +8,7 @@ function Base.convert(::Type{GenericArray{Float64,1}}, v::FrankWolfe.ScaledHotVe
     return GenericArray(collect(v))
 end
 
-@testset "GenericArray is maintained" begin
+@testset "GenericArray is maintained                        " begin
     n = Int(1e3)
     k = n
 
@@ -29,7 +29,7 @@ end
         max_iteration=k,
         line_search=FrankWolfe.Agnostic(),
         print_iter=k / 10,
-        verbose=true,
+        verbose=false,
         memory_mode=FrankWolfe.InplaceEmphasis(),
     )
 
@@ -44,7 +44,7 @@ end
         max_iteration=k,
         line_search=FrankWolfe.Agnostic(),
         print_iter=k / 10,
-        verbose=true,
+        verbose=false,
         memory_mode=FrankWolfe.InplaceEmphasis(),
         VType=FrankWolfe.ScaledHotVector{Float64},
     )
@@ -60,12 +60,12 @@ end
         max_iteration=k,
         line_search=FrankWolfe.Agnostic(),
         print_iter=k / 10,
-        verbose=true,
+        verbose=false,
         memory_mode=FrankWolfe.InplaceEmphasis(),
     )
 end
 
-@testset "fast_equal special types" begin
+@testset "Testing fast_equal for special types              " begin
     @testset "ScaledHotVector" begin
         a = FrankWolfe.ScaledHotVector(3.5, 3, 10)
         b = FrankWolfe.ScaledHotVector(3.5, 3, 11)
diff --git a/test/lmo.jl b/test/lmo.jl
index de36e591a..f690b9736 100644
--- a/test/lmo.jl
+++ b/test/lmo.jl
@@ -14,7 +14,7 @@ import Clp
 import Hypatia
 using JuMP
 
-@testset "Simplex LMOs" begin
+@testset "Simplex LMOs                                      " begin
     n = 6
     direction = zeros(6)
     rhs = 10 * rand()
@@ -60,7 +60,7 @@ using JuMP
     end
 end
 
-@testset "Hypersimplex" begin
+@testset "Hypersimplex                                      " begin
     @testset "Hypersimplex $n $K" for n in (2, 5, 10), K in (1, min(n, 4))
         direction = randn(n)
         hypersimplex = FrankWolfe.HyperSimplexOracle(K, 3.0)
@@ -80,7 +80,7 @@ end
     end
 end
 
-@testset "Lp-norm epigraph LMO" begin
+@testset "Lp-norm epigraph LMO                              " begin
     for n in (1, 2, 5, 10)
         τ = 5 + 3 * rand()
         # tests that the "special" p behaves like the "any" p, i.e. 2.0 and 2
@@ -123,7 +123,7 @@ end
     end
 end
 
-@testset "K-sparse polytope LMO" begin
+@testset "K-sparse polytope LMO                             " begin
     @testset "$n-dimension" for n in (1, 2, 10)
         τ = 5 + 3 * rand()
         for K in 1:n
@@ -153,7 +153,7 @@ end
     @test norm(v) > 0
 end
 
-@testset "Caching on simplex LMOs" begin
+@testset "Caching on simplex LMOs                           " begin
     n = 6
     direction = zeros(6)
     rhs = 10 * rand()
@@ -203,7 +203,7 @@ function _is_doubly_stochastic(m)
     end
 end
 
-@testset "Birkhoff polytope" begin
+@testset "Birkhoff polytope                                 " begin
     Random.seed!(42)
     lmo = FrankWolfe.BirkhoffPolytopeLMO()
     for n in (1, 2, 10)
@@ -247,7 +247,7 @@ end
     ]
 end
 
-@testset "Matrix completion and nuclear norm" begin
+@testset "Matrix completion and nuclear norm                " begin
     nfeat = 50
     nobs = 100
     r = 5
@@ -314,7 +314,7 @@ end
     )
 end
 
-@testset "Spectral norms" begin
+@testset "Spectral norms                                    " begin
     Random.seed!(42)
     o = Hypatia.Optimizer()
     MOI.set(o, MOI.Silent(), true)
@@ -403,7 +403,7 @@ end
     end
 end
 
-@testset "MOI oracle consistency" begin
+@testset "MOI oracle consistency                            " begin
     Random.seed!(42)
     o = GLPK.Optimizer()
     MOI.set(o, MOI.Silent(), true)
@@ -538,7 +538,7 @@ end
     end
 end
 
-@testset "MOI oracle on Birkhoff polytope" begin
+@testset "MOI oracle on Birkhoff polytope                   " begin
     o = GLPK.Optimizer()
     o_ref = GLPK.Optimizer()
     for n in (1, 2, 10)
@@ -576,7 +576,7 @@ end
     end
 end
 
-@testset "MOI oracle and KSparseLMO" begin
+@testset "MOI oracle and KSparseLMO                         " begin
     o_base = GLPK.Optimizer()
     cached = MOI.Utilities.CachingOptimizer(
         MOI.Utilities.UniversalFallback(MOI.Utilities.Model{Float64}()),
@@ -640,7 +640,7 @@ end
     end
 end
 
-@testset "Product LMO" begin
+@testset "Product LMO                                       " begin
     lmo = FrankWolfe.ProductLMO(FrankWolfe.LpNormLMO{Inf}(3.0), FrankWolfe.LpNormLMO{1}(2.0))
     dinf = randn(10)
     d1 = randn(5)
@@ -659,7 +659,7 @@ end
     @test FrankWolfe.BlockVector([vinf, v1]) == v_block
 end
 
-@testset "Scaled L-1 norm polytopes" begin
+@testset "Scaled L-1 norm polytopes                         " begin
     lmo = FrankWolfe.ScaledBoundL1NormBall(-ones(10), ones(10))
     # equivalent to LMO
     lmo_ref = FrankWolfe.LpNormLMO{1}(1)
@@ -691,7 +691,7 @@ end
     @test v ≈ [-1, 0]
 end
 
-@testset "Scaled L-inf norm polytopes" begin
+@testset "Scaled L-inf norm polytopes                       " begin
     # tests ScaledBoundLInfNormBall for the standard hypercube, a shifted one, and a scaled one
     lmo = FrankWolfe.ScaledBoundLInfNormBall(-ones(10), ones(10))
     lmo_ref = FrankWolfe.LpNormLMO{Inf}(1)
@@ -736,7 +736,7 @@ end
     end
 end
 
-@testset "Copy MathOpt LMO" begin
+@testset "Copy MathOpt LMO                                  " begin
     o_clp = MOI.Utilities.CachingOptimizer(
         MOI.Utilities.UniversalFallback(MOI.Utilities.Model{Float64}()),
         Clp.Optimizer(),
@@ -758,7 +758,7 @@ end
     end
 end
 
-@testset "MathOpt LMO with BlockVector" begin
+@testset "MathOpt LMO with BlockVector                      " begin
     o = GLPK.Optimizer()
     MOI.set(o, MOI.Silent(), true)
     x = MOI.add_variables(o, 10)
@@ -777,7 +777,7 @@ end
 
 end
 
-@testset "Inplace LMO correctness" begin
+@testset "Inplace LMO correctness                           " begin
 
     V = [-6.0, -6.15703, -5.55986]
     M = [3.0 2.8464949 2.4178848; 2.8464949 3.0 2.84649498; 2.4178848 2.84649498 3.0]
@@ -801,7 +801,7 @@ end
     @test x_dense == x_standard
 end
 
-@testset "Ellipsoid LMO $n" for n in (2, 5, 10)
+@testset "Ellipsoid LMO $n                                   " for n in (2, 5, 9)
     A = zeros(n, n)
     A[1, 1] = 3
     @test_throws PosDefException FrankWolfe.EllipsoidLMO(A)
@@ -833,7 +833,7 @@ end
     @test dot(xv, d) ≈ dot(v, d) atol = 1e-5 * n
 end
 
-@testset "Convex hull" begin
+@testset "Convex hull                                       " begin
     lmo = FrankWolfe.ConvexHullOracle([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
     for _ in 1:100
         d = randn(3)
@@ -846,7 +846,7 @@ end
     @test v == lmo.vertices[1]
 end
 
-@testset "Symmetric LMO" begin
+@testset "Symmetric LMO                                     " begin
     # See examples/reynolds.jl
     struct BellCorrelationsLMO{T} <: FrankWolfe.LinearMinimizationOracle
         m::Int # number of inputs
@@ -912,7 +912,7 @@ end
     end
     reynolds_adjoint(gradient, lmo) = gradient
     lmo = BellCorrelationsLMO{Float64}(size(p, 1), zeros(size(p, 1)))
-    sym = FrankWolfe.SymmetricLMO(lmo, reynolds_permutedims, reynolds_adjoint)
+    sym = FrankWolfe.SubspaceLMO(lmo, reynolds_permutedims, reynolds_adjoint)
     x0 = FrankWolfe.compute_extreme_point(sym, -p)
     active_set = FrankWolfe.ActiveSet([(1.0, x0)])
     res = FrankWolfe.blended_pairwise_conditional_gradient(
@@ -927,7 +927,7 @@ end
     @test length(res[6]) < 25
 end
 
-@testset "Ordered Weighted Norm LMO" begin
+@testset "Ordered Weighted Norm LMO                         " begin
     Random.seed!(4321)
     N = Int(1e3)
     for _ in 1:10 
diff --git a/test/momentum_memory.jl b/test/momentum_memory.jl
index 45fa98001..5ec6aea50 100644
--- a/test/momentum_memory.jl
+++ b/test/momentum_memory.jl
@@ -8,7 +8,7 @@ n = Int(1e3)
 k = 10000
 
 s = rand(1:100)
-@info "Seed: $s"
+# @info "Seed: $s"
 
 Random.seed!(s)
 
@@ -47,7 +47,7 @@ xmem, _ = FrankWolfe.frank_wolfe(
     max_iteration=k,
     line_search=FrankWolfe.Shortstep(2.0),
     memory_mode=FrankWolfe.InplaceEmphasis(),
-    verbose=true,
+    verbose=false,
     trajectory=true,
     momentum=0.9,
 )
diff --git a/test/oddities.jl b/test/oddities.jl
index 466a5019f..b64ecb901 100644
--- a/test/oddities.jl
+++ b/test/oddities.jl
@@ -2,7 +2,7 @@ import FrankWolfe
 using LinearAlgebra
 using Test
 
-@testset "Testing adaptive LS when already optimal and gradient is 0" begin
+@testset "Testing adaptive LS at optimality                 " begin
     f(x) = norm(x)^2
     function grad!(storage, x)
         return storage .= 2x
@@ -19,7 +19,7 @@ using Test
             x0,
             max_iteration=1000,
             line_search=FrankWolfe.Agnostic(),
-            verbose=true,
+            verbose=false,
         )[3],
     ) < 1.0e-10
 
@@ -32,7 +32,7 @@ using Test
             x0,
             max_iteration=1000,
             line_search=FrankWolfe.AdaptiveZerothOrder(),
-            verbose=true,
+            verbose=false,
         )[3],
     ) < 1.0e-10
     @test abs(
@@ -43,7 +43,7 @@ using Test
             x0,
             max_iteration=1000,
             line_search=FrankWolfe.Adaptive(),
-            verbose=true,
+            verbose=false,
         )[3],
     ) < 1.0e-10
 
@@ -58,7 +58,7 @@ using Test
             max_iteration=1000,
             lazy=true,
             line_search=FrankWolfe.AdaptiveZerothOrder(),
-            verbose=true,
+            verbose=false,
         )[3],
     ) < 1.0e-10
     @test abs(
@@ -70,7 +70,7 @@ using Test
             max_iteration=1000,
             lazy=true,
             line_search=FrankWolfe.Adaptive(),
-            verbose=true,
+            verbose=false,
         )[3],
     ) < 1.0e-10
 
@@ -83,7 +83,7 @@ using Test
             x0,
             max_iteration=1000,
             line_search=FrankWolfe.Adaptive(),
-            verbose=true,
+            verbose=false,
         )[3],
     ) < 1.0e-10
 
diff --git a/test/quadratic_lp_active_set.jl b/test/quadratic_lp_active_set.jl
index a87b37cea..50bcdb6e6 100644
--- a/test/quadratic_lp_active_set.jl
+++ b/test/quadratic_lp_active_set.jl
@@ -9,7 +9,7 @@ n = Int(1e4)
 k = 5000
 
 s = 10
-@info "Seed $s"
+# @info "Seed $s"
 Random.seed!(s)
 
 xpi = rand(n);
@@ -78,7 +78,7 @@ x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
     as_quad_direct_generic,
     max_iteration=k,
     line_search=FrankWolfe.Shortstep(2.0),
-    verbose=true,
+    verbose=false,
     callback=build_callback(trajectoryBPCG_quadratic_direct_generic),
 );
 
@@ -97,7 +97,7 @@ x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
     as_quad_direct_basic_as,
     max_iteration=k,
     line_search=FrankWolfe.Shortstep(2.0),
-    verbose=true,
+    verbose=false,
     callback=build_callback(trajectoryBPCG_quadratic_noqas),
 );
 
diff --git a/test/rational_test.jl b/test/rational_test.jl
index e9725157a..415b33291 100644
--- a/test/rational_test.jl
+++ b/test/rational_test.jl
@@ -37,7 +37,7 @@ xmem, vmem, primal, dual_gap, trajectory = FrankWolfe.frank_wolfe(
     max_iteration=k,
     line_search=FrankWolfe.Agnostic(),
     print_iter=k / 10,
-    verbose=true,
+    verbose=false,
     memory_mode=FrankWolfe.InplaceEmphasis(),
 )
 
@@ -49,7 +49,7 @@ xstep, _ = FrankWolfe.frank_wolfe(
     max_iteration=k,
     line_search=FrankWolfe.Shortstep(2 // 1),
     print_iter=k / 10,
-    verbose=true,
+    verbose=false,
 )
 
 @test eltype(xmem) == eltype(xstep) == Rational{BigInt}
diff --git a/test/runtests.jl b/test/runtests.jl
index 112582329..3a2bd298e 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -10,7 +10,7 @@ include("utils.jl")
 include("active_set_variants.jl")
 include("alternating_methods_tests.jl")
 
-@testset "Testing vanilla Frank-Wolfe with various step size and momentum strategies" begin
+@testset "Testing vanilla Frank-Wolfe                       " begin
     f(x) = norm(x)^2
     function grad!(storage, x)
         return storage .= 2x
@@ -25,7 +25,7 @@ include("alternating_methods_tests.jl")
             x0,
             max_iteration=10000,
             line_search=FrankWolfe.GeneralizedAgnostic(),
-            verbose=true,
+            verbose=false,
         )[3] - 0.2,
     ) < 1.0e-5
     g(x) = 2 + log(1 + log(x + 1))
@@ -37,7 +37,7 @@ include("alternating_methods_tests.jl")
             x0,
             max_iteration=10000,
             line_search=FrankWolfe.GeneralizedAgnostic(g),
-            verbose=true,
+            verbose=false,
         )[3] - 0.2,
     ) < 1.0e-5
     @test abs(
@@ -48,7 +48,7 @@ include("alternating_methods_tests.jl")
             x0,
             max_iteration=1000,
             line_search=FrankWolfe.Agnostic(),
-            verbose=true,
+            verbose=false,
         )[3] - 0.2,
     ) < 1.0e-5
     @test abs(
@@ -71,7 +71,7 @@ include("alternating_methods_tests.jl")
             x0,
             max_iteration=1000,
             line_search=FrankWolfe.Goldenratio(),
-            verbose=true,
+            verbose=false,
         )[3] - 0.2,
     ) < 1.0e-5
     @test abs(
@@ -155,12 +155,12 @@ include("alternating_methods_tests.jl")
     ) < 1.0e-3
 end
 
-@testset "Gradient with momentum correctly updated" begin
+@testset "Gradient with momentum correctly updated          " begin
     # fixing https://github.com/ZIB-IOL/FrankWolfe.jl/issues/47
     include("momentum_memory.jl")
 end
 
-@testset "Testing Lazified Conditional Gradients with various step size strategies" begin
+@testset "Testing Lazified Conditional Gradients            " begin
     f(x) = norm(x)^2
     function grad!(storage, x)
         @. storage = 2x
@@ -176,7 +176,7 @@ end
             x0,
             max_iteration=1000,
             line_search=FrankWolfe.Goldenratio(),
-            verbose=true,
+            verbose=false,
         )[3] - 0.2,
     ) < 1.0e-5
     @test abs(
@@ -187,7 +187,7 @@ end
             x0,
             max_iteration=1000,
             line_search=FrankWolfe.Backtracking(),
-            verbose=true,
+            verbose=false,
         )[3] - 0.2,
     ) < 1.0e-5
     @test abs(
@@ -198,7 +198,7 @@ end
             x0,
             max_iteration=1000,
             line_search=FrankWolfe.Shortstep(2.0),
-            verbose=true,
+            verbose=false,
         )[3] - 0.2,
     ) < 1.0e-5
     @test abs(
@@ -209,12 +209,12 @@ end
             x0,
             max_iteration=1000,
             line_search=FrankWolfe.AdaptiveZerothOrder(),
-            verbose=true,
+            verbose=false,
         )[3] - 0.2,
     ) < 1.0e-5
 end
 
-@testset "Testing Lazified Conditional Gradients with cache strategies" begin
+@testset "Testing caching in Lazified Conditional Gradients " begin
     n = Int(1e5)
     L = 2
     k = 1000
@@ -235,7 +235,7 @@ end
         x0,
         max_iteration=k,
         line_search=FrankWolfe.Shortstep(2.0),
-        verbose=true,
+        verbose=false,
     )
 
     @test primal - 1 / n <= bound
@@ -268,7 +268,7 @@ end
     @test primal - 1 / n <= bound
 end
 
-@testset "Testing memory_mode blas vs memory" begin
+@testset "Testing memory_mode blas vs memory                " begin
     n = Int(1e5)
     k = 100
     xpi = rand(n)
@@ -335,7 +335,7 @@ end
         @test primal < f(x0)
     end
 end
-@testset "Testing rational variant" begin
+@testset "Testing rational variant                          " begin
     rhs = 1
     n = 40
     k = 1000
@@ -377,7 +377,7 @@ end
         line_search=FrankWolfe.Agnostic(),
         print_iter=k / 10,
         memory_mode=FrankWolfe.InplaceEmphasis(),
-        verbose=true,
+        verbose=false,
     )
     @test eltype(x0) == eltype(x) == Rational{BigInt}
     @test f(x) <= 1e-4
@@ -393,7 +393,7 @@ end
         line_search=FrankWolfe.Shortstep(2 // 1),
         print_iter=k / 100,
         memory_mode=FrankWolfe.InplaceEmphasis(),
-        verbose=true,
+        verbose=false,
     )
 
     x0 = FrankWolfe.compute_extreme_point(lmo, direction)
@@ -406,11 +406,11 @@ end
         line_search=FrankWolfe.Shortstep(2 // 1),
         print_iter=k / 10,
         memory_mode=FrankWolfe.InplaceEmphasis(),
-        verbose=true,
+        verbose=false,
     )
     @test eltype(x) == Rational{BigInt}
 end
-@testset "Multi-precision tests" begin
+@testset "Multi-precision tests                             " begin
     rhs = 1
     n = 100
     k = 1000
@@ -470,7 +470,7 @@ end
             line_search=FrankWolfe.AdaptiveZerothOrder(),
             print_iter=k / 10,
             memory_mode=FrankWolfe.InplaceEmphasis(),
-            verbose=true,
+            verbose=false,
         )
 
         @test eltype(x0) == T
@@ -485,7 +485,7 @@ end
             line_search=FrankWolfe.AdaptiveZerothOrder(),
             print_iter=k / 10,
             memory_mode=FrankWolfe.InplaceEmphasis(),
-            verbose=true,
+            verbose=false,
         )
 
         @test eltype(x0) == T
@@ -494,7 +494,7 @@ end
     end
 end
 
-@testset "Stochastic FW linear regression" begin
+@testset "Stochastic FW linear regression                   " begin
     function simple_reg_loss(θ, data_point)
         (xi, yi) = data_point
         (a, b) = (θ[1:end-1], θ[end])
@@ -581,7 +581,7 @@ end
     )
 end
 
-@testset "Away-step FW" begin
+@testset "Away-step FW                                      " begin
     n = 50
     lmo_prob = FrankWolfe.ProbabilitySimplexOracle(1.0)
     x0 = FrankWolfe.compute_extreme_point(lmo_prob, rand(n))
@@ -612,7 +612,7 @@ end
         max_iteration=k,
         line_search=FrankWolfe.Backtracking(),
         print_iter=k / 10,
-        verbose=true,
+        verbose=false,
         memory_mode=FrankWolfe.OutplaceEmphasis(),
     )
 
@@ -627,7 +627,7 @@ end
         max_iteration=k,
         line_search=FrankWolfe.Backtracking(),
         print_iter=k / 10,
-        verbose=true,
+        verbose=false,
         memory_mode=FrankWolfe.OutplaceEmphasis(),
     )
 
@@ -643,7 +643,7 @@ end
         away_steps=false,
         line_search=FrankWolfe.Backtracking(),
         print_iter=k / 10,
-        verbose=true,
+        verbose=false,
         memory_mode=FrankWolfe.OutplaceEmphasis(),
     )
 
@@ -658,7 +658,7 @@ end
         max_iteration=k,
         line_search=FrankWolfe.Backtracking(),
         print_iter=k / 10,
-        verbose=true,
+        verbose=false,
         memory_mode=FrankWolfe.OutplaceEmphasis(),
     )
 
@@ -673,7 +673,7 @@ end
         max_iteration=k,
         line_search=FrankWolfe.Backtracking(),
         print_iter=k / 10,
-        verbose=true,
+        verbose=false,
         memory_mode=FrankWolfe.InplaceEmphasis(),
     )
 
@@ -689,7 +689,7 @@ end
         away_steps=false,
         line_search=FrankWolfe.Backtracking(),
         print_iter=k / 10,
-        verbose=true,
+        verbose=false,
         memory_mode=FrankWolfe.InplaceEmphasis(),
     )
 
@@ -704,7 +704,7 @@ end
         max_iteration=k,
         line_search=FrankWolfe.Backtracking(),
         print_iter=k / 10,
-        verbose=true,
+        verbose=false,
         memory_mode=FrankWolfe.InplaceEmphasis(),
     )
 
@@ -720,12 +720,12 @@ end
         max_iteration=k,
         line_search=FrankWolfe.Backtracking(),
         print_iter=k / 10,
-        verbose=true,
+        verbose=false,
         memory_mode=FrankWolfe.OutplaceEmphasis(),
     )
 end
 
-@testset "Blended conditional gradient" begin
+@testset "Testing Blended Conditional Gradient              " begin
     n = 50
     lmo_prob = FrankWolfe.ProbabilitySimplexOracle(1.0)
     x0 = FrankWolfe.compute_extreme_point(lmo_prob, randn(n))
@@ -743,7 +743,7 @@ end
         x0,
         max_iteration=k,
         line_search=FrankWolfe.Backtracking(),
-        verbose=true,
+        verbose=false,
         memory_mode=FrankWolfe.OutplaceEmphasis(),
     )
 
@@ -765,56 +765,55 @@ end
 
 end
 
-
 include("oddities.jl")
 include("tracking.jl")
 
 # in separate module for name space issues
 module BCGDirectionError
 using Test
-@testset "BCG direction accuracy" begin
+@testset "BCG direction accuracy                            " begin
     include("bcg_direction_error.jl")
 end
 end
 
 module RationalTest
 using Test
-@testset "Rational test and shortstep" begin
+@testset "Rational test and shortstep                       " begin
     include("rational_test.jl")
 end
 end
 
 module BCGAccel
 using Test
-@testset "BCG acceleration with different types" begin
+@testset "BCG acceleration with different types             " begin
     include("blended_accelerated.jl")
 end
 end
 
 module VertexStorageTest
 using Test
-@testset "Vertex storage" begin
+@testset "Vertex storage                                    " begin
     include("extra_storage.jl")
 end
 end
 
 module LpDirectSolveTest
 using Test
-@testset "LP solving for quadratic functions and active set" begin
+@testset "LP solving for quadratic functions and active set " begin
     include("quadratic_lp_active_set.jl")
 end
 end
 
 module LpDirectSolveTestProjection
 using Test
-@testset "LP solving for quadratic functions and active set" begin
+@testset "LP solving for quadratic functions and active set " begin
     include("as_quadratic_projection.jl")
 end
 end
 
 module SparsifyingActiveSetTest
 using Test
-@testset "Sparsifying active set" begin
+@testset "Sparsifying active set                            " begin
     include("sparsifying_activeset.jl")
 end
 end
@@ -823,7 +822,7 @@ include("generic-arrays.jl")
 
 include("blocks.jl")
 
-@testset "End-to-end trajectory tests" begin
+@testset "End-to-end trajectory tests                       " begin
     trajectory_testfiles = readdir(joinpath(@__DIR__, "trajectory_tests"), join=true)
     for file in trajectory_testfiles
         @eval Module() begin
diff --git a/test/sparsifying_activeset.jl b/test/sparsifying_activeset.jl
index 1fde16d53..1cf8d8299 100644
--- a/test/sparsifying_activeset.jl
+++ b/test/sparsifying_activeset.jl
@@ -21,7 +21,7 @@ k = 10000
 
 # s = rand(1:100)
 s = 10
-@info "Seed $s"
+# @info "Seed $s"
 Random.seed!(s)
 
 xpi = rand(n);
diff --git a/test/timeout.jl b/test/timeout.jl
index 7094ef5b3..9ba8aa47e 100644
--- a/test/timeout.jl
+++ b/test/timeout.jl
@@ -2,7 +2,7 @@ using FrankWolfe
 using LinearAlgebra
 using SparseArrays
 
-@testset "Timing out" begin
+@testset "Timing out                                        " begin
     f(x) = norm(x)^2
     function grad!(storage, x)
         return storage .= 2x
@@ -17,7 +17,7 @@ using SparseArrays
             x0,
             max_iteration=1000,
             line_search=FrankWolfe.Agnostic(),
-            verbose=true,
+            verbose=false,
         )[3] - 0.2,
     ) < 1.0e-5
     @test abs(
@@ -40,7 +40,7 @@ using SparseArrays
             x0,
             max_iteration=1000,
             line_search=FrankWolfe.Goldenratio(),
-            verbose=true,
+            verbose=false,
         )[3] - 0.2,
     ) < 1.0e-5
     @test abs(
diff --git a/test/tracking.jl b/test/tracking.jl
index ecd7d1823..dac8cb5f7 100644
--- a/test/tracking.jl
+++ b/test/tracking.jl
@@ -3,7 +3,7 @@ using LinearAlgebra
 using SparseArrays
 using FrankWolfe
 
-@testset "Tracking Testset" begin
+@testset "Tracking Testset                                  " begin
     f(x) = norm(x)^2
     function grad!(storage, x)
         return storage .= 2x
@@ -41,7 +41,7 @@ using FrankWolfe
     end
 end
 
-@testset "Testing vanilla Frank-Wolfe with various step size and momentum strategies" begin
+@testset "Testing vanilla Frank-Wolfe                       " begin
     f(x) = norm(x)^2
 
     function grad!(storage, x)
@@ -68,7 +68,7 @@ end
         trajectory=true,
         callback=nothing,
         traj_data=storage,
-        verbose=true,
+        verbose=false,
     )
 
     @test length(storage[1]) == 5
@@ -79,7 +79,7 @@ end
     @test tlmo.counter == niters + 1 # x0 computation and initialization
 end
 
-@testset "Testing lazified Frank-Wolfe with various step size and momentum strategies" begin
+@testset "Testing lazified Frank-Wolfe                      " begin
     f(x) = norm(x)^2
 
     function grad!(storage, x)
diff --git a/test/trajectory_tests/open_loop_parametric.jl b/test/trajectory_tests/open_loop_parametric.jl
index 474d98ecb..38b0e5fce 100644
--- a/test/trajectory_tests/open_loop_parametric.jl
+++ b/test/trajectory_tests/open_loop_parametric.jl
@@ -26,7 +26,7 @@ using LinearAlgebra
         line_search=FrankWolfe.Agnostic(2),
         print_iter=k / 10,
         epsilon=1e-5,
-        verbose=true,
+        verbose=false,
         trajectory=true,
     )
 
@@ -39,7 +39,7 @@ using LinearAlgebra
         line_search=FrankWolfe.Agnostic(10),
         print_iter=k / 10,
         epsilon=1e-5,
-        verbose=true,
+        verbose=false,
         trajectory=true,
     )
 
@@ -68,7 +68,7 @@ using LinearAlgebra
         line_search=FrankWolfe.Agnostic(2),
         print_iter=k / 10,
         epsilon=1e-5,
-        verbose=true,
+        verbose=false,
         trajectory=true,
     )
 
@@ -81,7 +81,7 @@ using LinearAlgebra
         line_search=FrankWolfe.Agnostic(10),
         print_iter=k / 10,
         epsilon=1e-5,
-        verbose=true,
+        verbose=false,
         trajectory=true,
     )
 
diff --git a/test/utils.jl b/test/utils.jl
index 66ed66e03..3fc795dd1 100644
--- a/test/utils.jl
+++ b/test/utils.jl
@@ -3,7 +3,7 @@ using LinearAlgebra
 using Test
 using SparseArrays
 
-@testset "Simple benchmark_oracles function" begin
+@testset "Simple benchmark_oracles function                 " begin
     n = Int(1e3)
 
     xpi = rand(n)
@@ -21,7 +21,7 @@ using SparseArrays
     FrankWolfe.benchmark_oracles(f, grad!, () -> rand(n), lmo_prob; k=100)
 end
 
-@testset "RankOneMatrix" begin
+@testset "RankOneMatrix                                     " begin
     for n in (1, 2, 5)
         for _ in 1:5
             v = rand(n)
@@ -64,7 +64,7 @@ end
     end
 end
 
-@testset "RankOne muladd_memory_mode $n" for n in (1, 2, 5)
+@testset "RankOne muladd_memory_mode $n                      " for n in (1, 2, 5)
     for _ in 1:5
         n = 5
         v = rand(n)
@@ -82,7 +82,7 @@ end
     end
 end
 
-@testset "Line Search methods" begin
+@testset "Line Search methods                               " begin
     a = [-1.0, -1.0, -1.0]
     b = [1.0, 1.0, 1.0]
     function grad!(storage, x)
@@ -214,14 +214,14 @@ end
     )
 end
 
-@testset "Momentum tests" begin
+@testset "Momentum tests                                    " begin
     it = FrankWolfe.ExpMomentumIterator()
     it.num = 0
     # no momentum -> 1
     @test FrankWolfe.momentum_iterate(it) == 1
 end
 
-@testset "Fast dot complex & norm" begin
+@testset "Fast dot complex & norm                           " begin
     s = sparse(I, 3, 3)
     m = randn(Complex{Float64}, 3, 3)
     @test dot(s, m) ≈ FrankWolfe.fast_dot(s, m)

From 9ab7500175ad46878eef125e0a307ddb9dc93597 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Designolle?=
 <55443987+sebastiendesignolle@users.noreply.github.com>
Date: Sun, 3 Nov 2024 08:45:22 +0100
Subject: [PATCH 15/21] Set renorm in the away update to false (#522)

---
 src/pairwise.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/pairwise.jl b/src/pairwise.jl
index 5a4e7c7e9..56269f503 100644
--- a/src/pairwise.jl
+++ b/src/pairwise.jl
@@ -266,7 +266,7 @@ function pairwise_frank_wolfe(
                 add_dropped_vertices=use_extra_vertex_storage,
                 vertex_storage=extra_vertex_storage,
             )
-            if add_dropped_vertices && gamma == gamma_max
+            if add_dropped_vertices && gamma ≈ gamma_max
                 for vtx in active_set.atoms
                     if vtx != v
                         push!(extra_vertex_storage, vtx)

From 3bcaffdfa6fa09fe8e248fdcf219dd1865319a3d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Designolle?=
 <55443987+sebastiendesignolle@users.noreply.github.com>
Date: Sun, 3 Nov 2024 18:13:33 +0100
Subject: [PATCH 16/21] Systematically cleanup in fw step (#525)

---
 src/pairwise.jl | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/pairwise.jl b/src/pairwise.jl
index 56269f503..1083a92ac 100644
--- a/src/pairwise.jl
+++ b/src/pairwise.jl
@@ -254,8 +254,6 @@ function pairwise_frank_wolfe(
 
             gamma = min(gamma_max, gamma)
 
-            # cleanup and renormalize every x iterations. Only for the fw steps.
-            renorm = mod(t, renorm_interval) == 0
             # away update
             active_set_update!(
                 active_set,
@@ -274,7 +272,7 @@ function pairwise_frank_wolfe(
                 end
             end
             # fw update
-            active_set_update!(active_set, gamma, fw_vertex, renorm, fw_index)
+            active_set_update!(active_set, gamma, fw_vertex, true, fw_index)
         end
         # println(active_set.weights)
         # println([atom[1] for atom in active_set.atoms])

From a8f2996c81991cf15b17961c4dcaaa963bd16f6d Mon Sep 17 00:00:00 2001
From: Hendrych <dhendryc@mob-009366.zib.de>
Date: Tue, 5 Nov 2024 13:34:49 +0100
Subject: [PATCH 17/21] Test against DICG as well.

---
 examples/optimal_experiment_design.jl | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/examples/optimal_experiment_design.jl b/examples/optimal_experiment_design.jl
index 3ad42d6ca..f60bf50c7 100644
--- a/examples/optimal_experiment_design.jl
+++ b/examples/optimal_experiment_design.jl
@@ -226,7 +226,13 @@ m = 300
         f, grad! = build_a_criterion(A, build_safe=false)
         x0, active_set = build_start_point(A)
         domain_oracle = build_domain_oracle(A)
-        x_s, _, primal, dual_gap, traj_data_s, _ = FrankWolfe.blended_pairwise_conditional_gradient(f, grad!, lmo, active_set, verbose=true, line_search=FrankWolfe.Secant(40, 1e-8, domain_oracle), trajectory=true)
+        x_s, _, primal, dual_gap, traj_data_s, _ = FrankWolfe.blended_pairwise_conditional_gradient(f, grad!, lmo, active_set, verbose=true, line_search=FrankWolfe.Secant(domain_oracle=domain_oracle), trajectory=true)
+
+        lmo = FrankWolfe.ProbabilitySimplexOracle(1.0)
+        f, grad! = build_a_criterion(A, build_safe=false)
+        x0, active_set = build_start_point(A)
+        domain_oracle = build_domain_oracle(A)
+        x, _, primal, dual_gap, traj_data = decomposition_invariant_conditional_gradient(f, grad!, lmo, x0, verbose=true,line_search=FrankWolfe.Secant(domain_oracle=domain_oracle), trajectory=true)
 
         @test traj_data_s[end][1] < traj_data[end][1]
         @test isapprox(f(x_s), f(x))
@@ -244,7 +250,13 @@ m = 300
         f, grad! = build_d_criterion(A, build_safe=false)
         x0, active_set = build_start_point(A)
         domain_oracle = build_domain_oracle(A)
-        x_s, _, primal, dual_gap, traj_data_s, _ = FrankWolfe.blended_pairwise_conditional_gradient(f, grad!, lmo, active_set, verbose=true, line_search=FrankWolfe.Secant(40, 1e-8, domain_oracle), trajectory=true)
+        x_s, _, primal, dual_gap, traj_data_s, _ = FrankWolfe.blended_pairwise_conditional_gradient(f, grad!, lmo, active_set, verbose=true, line_search=FrankWolfe.Secant(domain_oracle=domain_oracle), trajectory=true)
+
+        lmo = FrankWolfe.ProbabilitySimplexOracle(1.0)
+        f, grad! = build_d_criterion(A, build_safe=false)
+        x0, active_set = build_start_point(A)
+        domain_oracle = build_domain_oracle(A)
+        x, _, primal, dual_gap, traj_data = decomposition_invariant_conditional_gradient(f, grad!, lmo, x0, verbose=true,line_search=FrankWolfe.Secant(domain_oracle=domain_oracle), trajectory=true)
 
         @test traj_data_s[end][1] < traj_data[end][1]
         @test isapprox(f(x_s), f(x))

From d5f1d65a098338a6a410331c95ad02f10ad775b5 Mon Sep 17 00:00:00 2001
From: Hendrych <dhendryc@mob-009366.zib.de>
Date: Tue, 5 Nov 2024 15:41:16 +0100
Subject: [PATCH 18/21] Test DICG with Probability Simplex and MOI LMO.

---
 examples/optimal_experiment_design.jl | 56 ++++++++++++++++++++++++---
 1 file changed, 51 insertions(+), 5 deletions(-)

diff --git a/examples/optimal_experiment_design.jl b/examples/optimal_experiment_design.jl
index f60bf50c7..0f7129c77 100644
--- a/examples/optimal_experiment_design.jl
+++ b/examples/optimal_experiment_design.jl
@@ -4,6 +4,10 @@ using Random
 using Distributions
 using LinearAlgebra
 using Statistics
+using SCIP
+using MathOptInterface
+const MOI = MathOptInterface
+using SparseArrays
 using Test
 
 # The Optimal Experiment Design Problem consists of choosing a subset of experiments
@@ -43,6 +47,29 @@ function build_data(m)
     return A 
 end
 
+"""
+Build MOI version of the lmo.
+"""
+function build_moi_lmo(m)
+    o = SCIP.Optimizer()
+    MOI.empty!(o)
+    MOI.set(o, MOI.Silent(), true)
+
+    x = MOI.add_variables(o, m)
+
+    for xi in x 
+        # each var has to be non-negative
+        MOI.add_constraint(o, xi, MOI.GreaterThan(0.0))
+    end
+
+    # sum of all variables has to be less than 1.0
+    MOI.add_constraint(o, sum(x, init=0.0), MOI.LessThan(1.0))
+
+    lmo = FrankWolfe.MathOptLMO(o)
+
+    return lmo
+end
+
 """
 Check if given point is in the domain of f, i.e. X = transpose(A) * diagm(x) * A 
 positive definite.
@@ -87,12 +114,11 @@ function build_start_point(A)
     V = Vector{Float64}[]
 
     for i in S
-        v = zeros(m)
-        v[i] = 1.0
+        v = FrankWolfe.ScaledHotVector(1.0, i, m)
         push!(V, v)
     end
 
-    x = sum(V .* 1/n)
+    x = SparseArrays.SparseVector(sum(V .* 1/n))
     active_set= FrankWolfe.ActiveSet(fill(1/n, n), V, x)
 
     return x, active_set, S
@@ -232,10 +258,20 @@ m = 300
         f, grad! = build_a_criterion(A, build_safe=false)
         x0, active_set = build_start_point(A)
         domain_oracle = build_domain_oracle(A)
-        x, _, primal, dual_gap, traj_data = decomposition_invariant_conditional_gradient(f, grad!, lmo, x0, verbose=true,line_search=FrankWolfe.Secant(domain_oracle=domain_oracle), trajectory=true)
+        x_d, _, primal, dual_gap, traj_data_d = FrankWolfe.decomposition_invariant_conditional_gradient(f, grad!, lmo, x0, verbose=true,line_search=FrankWolfe.Secant(domain_oracle=domain_oracle), trajectory=true)
+
+        lmo = build_moi_lmo(m)
+        f, grad! = build_a_criterion(A, build_safe=false)
+        x0, active_set = build_start_point(A)
+        domain_oracle = build_domain_oracle(A)
+        x_d_m, _, primal, dual_gap, traj_data_d_m = FrankWolfe.decomposition_invariant_conditional_gradient(f, grad!, lmo, x0, verbose=true,line_search=FrankWolfe.Secant(domain_oracle=domain_oracle), trajectory=true)
 
         @test traj_data_s[end][1] < traj_data[end][1]
+        @test traj_data_d[end][1] <= traj_data_s[end][1]
+        @test traj_data_d_m[end][1] <= traj_data_s[end][1]
         @test isapprox(f(x_s), f(x))
+        @test isapprox(f(x_s), f(x_d))
+        @test isapprox(f(x_s), f(x_d_m))
     end
 
     @testset "D-Optimal Design" begin
@@ -256,10 +292,20 @@ m = 300
         f, grad! = build_d_criterion(A, build_safe=false)
         x0, active_set = build_start_point(A)
         domain_oracle = build_domain_oracle(A)
-        x, _, primal, dual_gap, traj_data = decomposition_invariant_conditional_gradient(f, grad!, lmo, x0, verbose=true,line_search=FrankWolfe.Secant(domain_oracle=domain_oracle), trajectory=true)
+        x_d, _, primal, dual_gap, traj_data_d = FrankWolfe.decomposition_invariant_conditional_gradient(f, grad!, lmo, x0, verbose=true,line_search=FrankWolfe.Secant(domain_oracle=domain_oracle), trajectory=true)
+
+        lmo = build_moi_lmo(m)
+        f, grad! = build_d_criterion(A, build_safe=false)
+        x0, active_set = build_start_point(A)
+        domain_oracle = build_domain_oracle(A)
+        x_d_m, _, primal, dual_gap, traj_data_d_m = FrankWolfe.decomposition_invariant_conditional_gradient(f, grad!, lmo, x0, verbose=true,line_search=FrankWolfe.Secant(domain_oracle=domain_oracle), trajectory=true)
 
         @test traj_data_s[end][1] < traj_data[end][1]
+        @test traj_data_d[end][1] <= traj_data_s[end][1]
+        @test traj_data_d_m[end][1] <= traj_data_s[end][1]
         @test isapprox(f(x_s), f(x))
+        @test isapprox(f(x_s), f(x_d))
+        @test isapprox(f(x_s), f(x_d_m))
     end
 end
 

From ad596206a8be02efa58bc38a224c2ac0bd133e87 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mathieu=20Besan=C3=A7on?= <mathieu.besancon@gmail.com>
Date: Wed, 6 Nov 2024 09:13:11 +0100
Subject: [PATCH 19/21] fix typo

---
 src/dicg.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/dicg.jl b/src/dicg.jl
index eacf4e707..9b7a749a9 100644
--- a/src/dicg.jl
+++ b/src/dicg.jl
@@ -124,7 +124,7 @@ function decomposition_invariant_conditional_gradient(
         println("GRADIENstep_typeYPE: $grad_type LAZY: $lazy lazy_tolerance: $lazy_tolerance")
         println("LMO: $(typeof(lmo))")
         if memory_mode isa InplaceEmphasis
-            @info("In memory_mode memory iterates are wristep_typeen back into x0!")
+            @info("In memory_mode memory iterates are written back into x0!")
         end
     end
 
@@ -350,7 +350,7 @@ function blended_decomposition_invariant_conditional_gradient(
         println("GRADIENstep_typeYPE: $grad_type LAZY: $lazy lazy_tolerance: $lazy_tolerance")
         println("LMO: $(typeof(lmo))")
         if memory_mode isa InplaceEmphasis
-            @info("In memory_mode memory iterates are wristep_typeen back into x0!")
+            @info("In memory_mode memory iterates are written back into x0!")
         end
     end
 

From be7d5687ae64436cf0d1d312e30b09b8cfc76875 Mon Sep 17 00:00:00 2001
From: Hendrych <dhendryc@mob-009366.zib.de>
Date: Thu, 7 Nov 2024 11:42:57 +0100
Subject: [PATCH 20/21] Fix typo.

---
 src/blended_cg.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/blended_cg.jl b/src/blended_cg.jl
index eee21f998..f239b5ca9 100644
--- a/src/blended_cg.jl
+++ b/src/blended_cg.jl
@@ -163,7 +163,7 @@ function blended_conditional_gradient(
     end
 
     if verbose
-        println("\nBlended Conditional Gradients Algorithm.")
+        println("\nBlended Conditional Gradient Algorithm.")
         NumType = eltype(x)
         println(
             "MEMORY_MODE: $memory_mode STEPSIZE: $line_search EPSILON: $epsilon MAXITERATION: $max_iteration TYPE: $NumType",

From e5daf0a7ce4f773201880ea847eab6ab90f343aa Mon Sep 17 00:00:00 2001
From: Hendrych <dhendryc@mob-009366.zib.de>
Date: Thu, 7 Nov 2024 11:43:32 +0100
Subject: [PATCH 21/21] Add BCG to the tests.

---
 examples/optimal_experiment_design.jl | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/examples/optimal_experiment_design.jl b/examples/optimal_experiment_design.jl
index 0f7129c77..62b0ab5cf 100644
--- a/examples/optimal_experiment_design.jl
+++ b/examples/optimal_experiment_design.jl
@@ -241,6 +241,7 @@ end
 m = 300
 @testset "Limit Optimal Design Problem" begin
     @testset "A-Optimal Design" begin 
+        println("A_OPTIMAL DESIGN")
         A = build_data(m)
         f, grad! = build_a_criterion(A, build_safe=true)
         lmo = FrankWolfe.ProbabilitySimplexOracle(1.0)
@@ -260,18 +261,18 @@ m = 300
         domain_oracle = build_domain_oracle(A)
         x_d, _, primal, dual_gap, traj_data_d = FrankWolfe.decomposition_invariant_conditional_gradient(f, grad!, lmo, x0, verbose=true,line_search=FrankWolfe.Secant(domain_oracle=domain_oracle), trajectory=true)
 
-        lmo = build_moi_lmo(m)
+        lmo = FrankWolfe.ProbabilitySimplexOracle(1.0)
         f, grad! = build_a_criterion(A, build_safe=false)
         x0, active_set = build_start_point(A)
         domain_oracle = build_domain_oracle(A)
-        x_d_m, _, primal, dual_gap, traj_data_d_m = FrankWolfe.decomposition_invariant_conditional_gradient(f, grad!, lmo, x0, verbose=true,line_search=FrankWolfe.Secant(domain_oracle=domain_oracle), trajectory=true)
+        x_b, _, primal, dual_gap, traj_data_b, _ = FrankWolfe.blended_conditional_gradient(f, grad!, lmo, x0, verbose=true, trajectory=true,line_search=FrankWolfe.Secant(domain_oracle=domain_oracle))
 
         @test traj_data_s[end][1] < traj_data[end][1]
         @test traj_data_d[end][1] <= traj_data_s[end][1]
-        @test traj_data_d_m[end][1] <= traj_data_s[end][1]
+        @test traj_data_b[end][1] <= traj_data_s[end][1]
         @test isapprox(f(x_s), f(x))
         @test isapprox(f(x_s), f(x_d))
-        @test isapprox(f(x_s), f(x_d_m))
+        @test isapprox(f(x_s), f(x_b))
     end
 
     @testset "D-Optimal Design" begin
@@ -294,18 +295,19 @@ m = 300
         domain_oracle = build_domain_oracle(A)
         x_d, _, primal, dual_gap, traj_data_d = FrankWolfe.decomposition_invariant_conditional_gradient(f, grad!, lmo, x0, verbose=true,line_search=FrankWolfe.Secant(domain_oracle=domain_oracle), trajectory=true)
 
-        lmo = build_moi_lmo(m)
+        domain_oracle = build_domain_oracle(A)
+        lmo = FrankWolfe.ProbabilitySimplexOracle(1.0)
         f, grad! = build_d_criterion(A, build_safe=false)
         x0, active_set = build_start_point(A)
         domain_oracle = build_domain_oracle(A)
-        x_d_m, _, primal, dual_gap, traj_data_d_m = FrankWolfe.decomposition_invariant_conditional_gradient(f, grad!, lmo, x0, verbose=true,line_search=FrankWolfe.Secant(domain_oracle=domain_oracle), trajectory=true)
+        x_b, _, primal, dual_gap, traj_data_b, _ = FrankWolfe.blended_conditional_gradient(f, grad!, lmo, x0, verbose=true, trajectory=true,line_search=FrankWolfe.Secant(domain_oracle=domain_oracle))
 
         @test traj_data_s[end][1] < traj_data[end][1]
         @test traj_data_d[end][1] <= traj_data_s[end][1]
-        @test traj_data_d_m[end][1] <= traj_data_s[end][1]
+        @test traj_data_b[end][1] <= traj_data_s[end][1]
         @test isapprox(f(x_s), f(x))
         @test isapprox(f(x_s), f(x_d))
-        @test isapprox(f(x_s), f(x_d_m))
+        @test isapprox(f(x_s), f(x_b))
     end
 end