From 8c8cc77c9ff911bcb2b1a7bdb3d36e91650d31e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mathieu=20Besan=C3=A7on?= Date: Tue, 3 Sep 2024 14:36:29 +0200 Subject: [PATCH 01/21] added tullio --- Project.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 270baa783..2d7a962e3 100644 --- a/Project.toml +++ b/Project.toml @@ -53,7 +53,8 @@ Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" Polyhedra = "67491407-f73d-577b-9b50-8179a7c68029" ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +Tullio = "bc48ee85-29a4-5162-ae0b-a64e1601d4bc" ZipFile = "a5390f91-8eb1-5f08-bee0-b1d1ffed6cea" [targets] -test = ["CSV", "Combinatorics", "DataFrames", "DoubleFloats", "DynamicPolynomials", "FiniteDifferences", "ForwardDiff", "GLPK", "JSON", "JuMP", "LaTeXStrings", "MAT", "MultivariatePolynomials", "Plots", "PlotThemes", "Polyhedra", "ReverseDiff", "ZipFile", "Test", "Clp", "Hypatia"] +test = ["CSV", "Combinatorics", "DataFrames", "DoubleFloats", "DynamicPolynomials", "FiniteDifferences", "ForwardDiff", "GLPK", "JSON", "JuMP", "LaTeXStrings", "MAT", "MultivariatePolynomials", "Plots", "PlotThemes", "Polyhedra", "ReverseDiff", "ZipFile", "Test", "Tullio", "Clp", "Hypatia"] From 6779b5a7d7b91e6eb8107c6b87141f6d6c969551 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mathieu=20Besan=C3=A7on?= Date: Mon, 9 Sep 2024 11:31:41 +0200 Subject: [PATCH 02/21] Dicg tracking (#501) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * add Combinatorics test (#486) * Secant Method based line search strategy for self-concordant functions (#491) * initial check - in // secant-based line search for self-concordant functions * added secant test and added reset_state to prevent side effects with the gradient and the linesearch workspaces / happened with golden-ration with it * minor updat to example * minor fixes to line search (clean-up) and example * MAT in test deps * remove reversediff * added data * more tests * added doc string * added a larger portfolio example * line search docstring attached * adapted workspace to avoid overwritting the gradient * formatting * remove benchmarktools --------- Co-authored-by: Mathieu Besançon * Update Project.toml (#492) * added tullio * added tullio (#493) * Use the best value for phi. * Secant fix (#495) * added tullio * change order for secant * domain oracle * BPCG - Real dual gap in Post Processing (#498) * Report the real dual gap in the post processing. * Add a note to non montonocity of the dual gap. * Update README.md --------- Co-authored-by: Hendrych Co-authored-by: Mathieu Besançon * Update Project.toml (#499) * Optimal Design Problem Example (#494) * Optimal Design Problem example. * Wrap Secant linesearch in MonotonicGenericStep. * Set the correct Step type flag. * Use corrected Secant step size. Compare number of iterations. Secant should require less iterations. * Check only the iterations. --------- Co-authored-by: Hendrych * breaking changes for 0.4 (#484) * breaking change on step type (#483) * breaking change on step type * st to steptype_string * step type names * Update Project.toml * adapt log --------- Co-authored-by: dhendryc <92737336+dhendryc@users.noreply.github.com> * added secant workspace to track last gamma as start (#500) * dicg tracking * adapt to 04 --------- Co-authored-by: Sebastian Pokutta <23001135+pokutta@users.noreply.github.com> Co-authored-by: Hendrych Co-authored-by: Sébastien Designolle <55443987+sebastiendesignolle@users.noreply.github.com> Co-authored-by: dhendryc <92737336+dhendryc@users.noreply.github.com> --- src/FrankWolfe.jl | 2 +- src/dicg.jl | 43 ++++++++++++++++----------------- src/tracking.jl | 8 ++++++ test/decomposition_invariant.jl | 3 +++ 4 files changed, 33 insertions(+), 23 deletions(-) diff --git a/src/FrankWolfe.jl b/src/FrankWolfe.jl index 27636717c..e39503847 100644 --- a/src/FrankWolfe.jl +++ b/src/FrankWolfe.jl @@ -45,9 +45,9 @@ include("block_coordinate_algorithms.jl") include("alternating_methods.jl") include("blended_pairwise.jl") include("pairwise.jl") +include("dicg.jl") include("tracking.jl") include("callback.jl") -include("dicg.jl") # collecting most common data types etc and precompile # min version req set to 1.5 to prevent stalling of julia 1 diff --git a/src/dicg.jl b/src/dicg.jl index a26ddba77..eacf4e707 100644 --- a/src/dicg.jl +++ b/src/dicg.jl @@ -65,7 +65,7 @@ function decomposition_invariant_conditional_gradient( headers = ("Type", "Iteration", "Primal", "Dual", "Dual Gap", "Time", "It/sec") function format_state(state, args...) rep = ( - st[Symbol(state.tt)], + steptype_string[Symbol(state.step_type)], string(state.t), Float64(state.primal), Float64(state.primal - state.dual_gap), @@ -105,7 +105,7 @@ function decomposition_invariant_conditional_gradient( t = 0 primal = convert(float(eltype(x)), Inf) - tt = regular + step_type = ST_REGULAR time_start = time_ns() d = similar(x) @@ -121,10 +121,10 @@ function decomposition_invariant_conditional_gradient( "MEMORY_MODE: $memory_mode STEPSIZE: $line_search EPSILON: $epsilon MAXITERATION: $max_iteration TYPE: $NumType", ) grad_type = typeof(gradient) - println("GRADIENTTYPE: $grad_type LAZY: $lazy lazy_tolerance: $lazy_tolerance") + println("GRADIENstep_typeYPE: $grad_type LAZY: $lazy lazy_tolerance: $lazy_tolerance") println("LMO: $(typeof(lmo))") if memory_mode isa InplaceEmphasis - @info("In memory_mode memory iterates are written back into x0!") + @info("In memory_mode memory iterates are wristep_typeen back into x0!") end end @@ -166,7 +166,7 @@ function decomposition_invariant_conditional_gradient( end if lazy - d, v, v_index, a, away_index, phi, tt = + d, v, v_index, a, away_index, phi, step_type = lazy_dicg_step( x, gradient, @@ -219,7 +219,7 @@ function decomposition_invariant_conditional_gradient( grad!, lmo, gradient, - tt, + step_type, ) if callback(state) === false break @@ -238,7 +238,7 @@ function decomposition_invariant_conditional_gradient( primal = f(x) dual_gap = fast_dot(x, gradient) - fast_dot(v, gradient) if verbose - tt = last + step_type = ST_LAST tot_time = (time_ns() - time_start) / 1e9 if callback !== nothing state = CallbackState( @@ -255,7 +255,7 @@ function decomposition_invariant_conditional_gradient( grad!, lmo, gradient, - tt, + step_type, ) callback(state) end @@ -300,7 +300,7 @@ function blended_decomposition_invariant_conditional_gradient( headers = ("Type", "Iteration", "Primal", "Dual", "Dual Gap", "Time", "It/sec") function format_state(state, args...) rep = ( - st[Symbol(state.tt)], + steptype_string[Symbol(state.step_type)], string(state.t), Float64(state.primal), Float64(state.primal - state.dual_gap), @@ -331,7 +331,7 @@ function blended_decomposition_invariant_conditional_gradient( t = 0 primal = convert(float(eltype(x)), Inf) - tt = regular + step_type = ST_REGULAR time_start = time_ns() d = similar(x) @@ -347,10 +347,10 @@ function blended_decomposition_invariant_conditional_gradient( "MEMORY_MODE: $memory_mode STEPSIZE: $line_search EPSILON: $epsilon MAXITERATION: $max_iteration TYPE: $NumType", ) grad_type = typeof(gradient) - println("GRADIENTTYPE: $grad_type LAZY: $lazy lazy_tolerance: $lazy_tolerance") + println("GRADIENstep_typeYPE: $grad_type LAZY: $lazy lazy_tolerance: $lazy_tolerance") println("LMO: $(typeof(lmo))") if memory_mode isa InplaceEmphasis - @info("In memory_mode memory iterates are written back into x0!") + @info("In memory_mode memory iterates are wristep_typeen back into x0!") end end @@ -401,11 +401,11 @@ function blended_decomposition_invariant_conditional_gradient( phi = dual_gap # in-face step if inface_gap >= phi / lazy_tolerance - tt = pairwise + step_type = ST_PAIRWISE d = muladd_memory_mode(memory_mode, d, a, v) gamma_max = dicg_maximum_step(lmo, d, x) else # global FW step - tt = regular + step_type = ST_REGULAR d = muladd_memory_mode(memory_mode, d, x, v) gamma_max = one(phi) end @@ -437,7 +437,7 @@ function blended_decomposition_invariant_conditional_gradient( grad!, lmo, gradient, - tt, + step_type, ) if callback(state) === false break @@ -456,7 +456,7 @@ function blended_decomposition_invariant_conditional_gradient( primal = f(x) dual_gap = fast_dot(x, gradient) - fast_dot(v, gradient) if verbose - tt = last + step_type = ST_LAST tot_time = (time_ns() - time_start) / 1e9 if callback !== nothing state = CallbackState( @@ -473,7 +473,7 @@ function blended_decomposition_invariant_conditional_gradient( grad!, lmo, gradient, - tt, + step_type, ) callback(state) end @@ -496,8 +496,7 @@ function lazy_dicg_step( ) v_local, v_local_loc, val, a_local, a_local_loc, valM = pre_computed_set_argminmax(pre_computed_set, gradient) - tt = regular - gamma_max = nothing + step_type = ST_REGULAR away_index = nothing fw_index = nothing grad_dot_x = fast_dot(x, gradient) @@ -508,7 +507,7 @@ function lazy_dicg_step( if grad_dot_a_local - grad_dot_lazy_fw_vertex >= phi / lazy_tolerance && grad_dot_a_local - grad_dot_lazy_fw_vertex >= epsilon - tt = lazy + step_type = ST_LAZY v = v_local a = a_local d = muladd_memory_mode(memory_mode, d, a, v) @@ -519,7 +518,7 @@ function lazy_dicg_step( # Do lazy inface_point if grad_dot_a_local - grad_dot_v >= phi / lazy_tolerance && grad_dot_a_local - grad_dot_v >= epsilon - tt = lazy + step_type = ST_LAZY a = a_local away_index = a_local_loc else @@ -538,5 +537,5 @@ function lazy_dicg_step( phi = min(dual_gap, phi / 2.0) end end - return d, v, fw_index, a, away_index, phi, tt + return d, v, fw_index, a, away_index, phi, step_type end diff --git a/src/tracking.jl b/src/tracking.jl index 949806fbf..c8b4d7578 100644 --- a/src/tracking.jl +++ b/src/tracking.jl @@ -62,3 +62,11 @@ is_tracking_lmo(lmo) = false is_tracking_lmo(lmo::TrackingLMO) = true TrackingLMO(lmo) = TrackingLMO(lmo, 0) + +is_decomposition_invariant_oracle(lmo::TrackingLMO) = is_decomposition_invariant_oracle(lmo.lmo) + +function compute_inface_extreme_point(lmo::TrackingLMO, direction, x; lazy, kwargs...) + return compute_inface_extreme_point(lmo.lmo, direction, x; lazy=lazy, kwargs...) +end + +dicg_maximum_step(lmo::TrackingLMO, direction, x) = dicg_maximum_step(lmo.lmo, direction, x) diff --git a/test/decomposition_invariant.jl b/test/decomposition_invariant.jl index 0d6e2a049..f6d852f03 100644 --- a/test/decomposition_invariant.jl +++ b/test/decomposition_invariant.jl @@ -247,5 +247,8 @@ end res_fw = FrankWolfe.frank_wolfe(f, grad!, lmo, FrankWolfe.compute_extreme_point(lmo, randn(n))) res_dicg = FrankWolfe.decomposition_invariant_conditional_gradient(f, grad!, lmo, FrankWolfe.compute_extreme_point(lmo, randn(n))) @test norm(res_fw.x - res_dicg.x) ≤ 1e-4 + lmo_tracking = FrankWolfe.TrackingLMO(lmo) + res_dicg_tracking = FrankWolfe.decomposition_invariant_conditional_gradient(f, grad!, lmo_tracking, FrankWolfe.compute_extreme_point(lmo, randn(n))) + @test norm(res_dicg_tracking.x - res_dicg.x) ≤ 1e-4 end end From 456139a6a5e81199ce0e764d39a0877c86b15e51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mathieu=20Besan=C3=A7on?= Date: Thu, 24 Oct 2024 20:22:49 +0200 Subject: [PATCH 03/21] inverse to linsolve for AS quadratic (#510) --- src/active_set_quadratic.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/active_set_quadratic.jl b/src/active_set_quadratic.jl index b200a8912..8f26b632e 100644 --- a/src/active_set_quadratic.jl +++ b/src/active_set_quadratic.jl @@ -36,7 +36,7 @@ function detect_quadratic_function(grad!, x0; test=true) X[:, i] .-= x0 G[:, i] .= storage .- g0 end - A = G * inv(X) + A = G \ X b = g0 - A * x0 if test x_test = randn(T, n) From ffc360caf3027de2f334a407890943f664a89696 Mon Sep 17 00:00:00 2001 From: "Jannis H." <44263801+JannisHal@users.noreply.github.com> Date: Thu, 24 Oct 2024 23:19:50 +0200 Subject: [PATCH 04/21] Alternating projection update (#513) * Update and generalize AP * Reuse active step and dual_gap information * Example comparing active vanilla FW, bpcg and active set reuse * Bug fix * dist2 - reduce allocations * Format document * Remove obsolete dependence --- examples/alternating_projections.jl | 37 +++++++ src/alternating_methods.jl | 163 ++++++++++++++++------------ test/alternating_methods_tests.jl | 35 +++--- 3 files changed, 149 insertions(+), 86 deletions(-) create mode 100644 examples/alternating_projections.jl diff --git a/examples/alternating_projections.jl b/examples/alternating_projections.jl new file mode 100644 index 000000000..159d204ff --- /dev/null +++ b/examples/alternating_projections.jl @@ -0,0 +1,37 @@ +import FrankWolfe +using LinearAlgebra +using Random + + +include("../examples/plot_utils.jl") + +Random.seed!(100) + +n = 500 +lmo = FrankWolfe.ConvexHullOracle([rand(Float64, (n,)) .+ 1.0 for _ in 1:n]) +lmo2 = FrankWolfe.ConvexHullOracle([rand(Float64, (n,)) .- 1.0 for _ in 1:n]) + + +trajectories = [] + +methods = [FrankWolfe.frank_wolfe, FrankWolfe.blended_pairwise_conditional_gradient, FrankWolfe.blended_pairwise_conditional_gradient] + +for (i,m) in enumerate(methods) + if i == 1 + x, _, _, _, traj_data = FrankWolfe.alternating_projections((lmo, lmo2), ones(n); verbose=true, print_iter=100, trajectory=true, proj_method=m) + elseif i== 2 + x, _, _, _, traj_data = FrankWolfe.alternating_projections((lmo, lmo2), ones(n); verbose=true, print_iter=100, trajectory=true, proj_method=m, reuse_active_set=false, lazy=true) + else + x, _, _, _, traj_data = FrankWolfe.alternating_projections((lmo, lmo2), ones(n); verbose=true, print_iter=100, trajectory=true, proj_method=m, reuse_active_set=true, lazy=true) + end + push!(trajectories, traj_data) +end + + + +labels = ["FW", "BPCG" ,"BPCG (reuse)"] + +plot_trajectories(trajectories, labels, xscalelog=true) + + + diff --git a/src/alternating_methods.jl b/src/alternating_methods.jl index ec1c5b2f5..d07a73649 100644 --- a/src/alternating_methods.jl +++ b/src/alternating_methods.jl @@ -35,12 +35,12 @@ end Alternating Linear Minimization minimizes the objective `f` over the intersections of the feasible domains specified by `lmos`. The tuple `x0` defines the initial points for each domain. -Returns a tuple `(x, v, primal, dual_gap, infeas, traj_data)` with: +Returns a tuple `(x, v, primal, dual_gap, dist2, traj_data)` with: - `x` cartesian product of final iterates - `v` cartesian product of last vertices of the LMOs - `primal` primal value `f(x)` - `dual_gap` final Frank-Wolfe gap -- `infeas` sum of squared, pairwise distances between iterates +- `dist2` is 1/2 of the sum of squared, pairwise distances between iterates - `traj_data` vector of trajectory information. """ function alternating_linear_minimization( @@ -49,17 +49,17 @@ function alternating_linear_minimization( grad!, lmos::NTuple{N,LinearMinimizationOracle}, x0::Tuple{Vararg{Any,N}}; - lambda::Union{Float64, Function}=1.0, + lambda::Union{Float64,Function}=1.0, verbose=false, trajectory=false, callback=nothing, max_iteration=10000, - print_iter = max_iteration / 10, + print_iter=max_iteration / 10, memory_mode=InplaceEmphasis(), line_search::LS=Adaptive(), epsilon=1e-7, kwargs..., -) where {N, LS<:Union{LineSearchMethod,NTuple{N,LineSearchMethod}}} +) where {N,LS<:Union{LineSearchMethod,NTuple{N,LineSearchMethod}}} x0_bc = BlockVector([x0[i] for i in 1:N], [size(x0[i]) for i in 1:N], sum(length, x0)) gradf = similar(x0_bc) @@ -74,21 +74,14 @@ function alternating_linear_minimization( for i in 1:N grad!(gradf.blocks[i], x.blocks[i]) end - t = [2.0 * (N * b - sum(x.blocks)) for b in x.blocks] + t = [N * b - sum(x.blocks) for b in x.blocks] return storage.blocks = λ[] * gradf.blocks + t end end - function dist2(x::BlockVector) - s = 0 - for i=1:N - for j=1:i-1 - diff = x.blocks[i] - x.blocks[j] - s += fast_dot(diff, diff) - end - end - return s - end + dist2(x::BlockVector) = + 0.5 * (N - 1) * sum(fast_dot(x.blocks[i], x.blocks[i]) for i in 1:N) - + sum(fast_dot(x.blocks[i], x.blocks[j]) for i in 1:N for j in 1:i-1) function build_objective() λ = Ref(λ0) @@ -122,8 +115,11 @@ function alternating_linear_minimization( num_type = eltype(x0[1]) grad_type = eltype(gradf.blocks[1]) - line_search_type = line_search isa Tuple ? [typeof(a) for a in line_search] : typeof(line_search) - println("MEMORY_MODE: $memory_mode STEPSIZE: $line_search_type EPSILON: $epsilon MAXITERATION: $max_iteration") + line_search_type = + line_search isa Tuple ? [typeof(a) for a in line_search] : typeof(line_search) + println( + "MEMORY_MODE: $memory_mode STEPSIZE: $line_search_type EPSILON: $epsilon MAXITERATION: $max_iteration", + ) println("TYPE: $num_type GRADIENTTYPE: $grad_type") println("LAMBDA: $lambda") @@ -177,7 +173,7 @@ function alternating_linear_minimization( end if lambda isa Function - callback = function (state,args...) + callback = function (state, args...) state.f.λ[] = lambda(state) state.grad!.λ[] = state.f.λ[] @@ -211,33 +207,15 @@ function alternating_linear_minimization( end -function ProjectionFW(y, lmo; max_iter=10000, eps=1e-3) - f(x) = sum(abs2, x - y) - grad!(storage, x) = storage .= 2 * (x - y) - - x0 = FrankWolfe.compute_extreme_point(lmo, y) - x_opt, _ = FrankWolfe.frank_wolfe( - f, - grad!, - lmo, - x0, - epsilon=eps, - max_iteration=max_iter, - trajectory=true, - line_search=FrankWolfe.Adaptive(verbose=false, relaxed_smoothness=false), - ) - return x_opt -end - """ alternating_projections(lmos::NTuple{N,LinearMinimizationOracle}, x0; ...) where {N} Computes a point in the intersection of feasible domains specified by `lmos`. -Returns a tuple `(x, v, dual_gap, infeas, traj_data)` with: +Returns a tuple `(x, v, dual_gap, dist2, traj_data)` with: - `x` cartesian product of final iterates - `v` cartesian product of last vertices of the LMOs - `dual_gap` final Frank-Wolfe gap -- `infeas` sum of squared, pairwise distances between iterates +- `dist2` is 1/2 * sum of squared, pairwise distances between iterates - `traj_data` vector of trajectory information. """ function alternating_projections( @@ -252,6 +230,10 @@ function alternating_projections( callback=nothing, traj_data=[], timeout=Inf, + proj_method=frank_wolfe, + inner_epsilon::Function=t -> 1 / (t^2 + 1), + reuse_active_set=false, + kwargs..., ) where {N} return alternating_projections( ProductLMO(lmos), @@ -265,6 +247,10 @@ function alternating_projections( callback, traj_data, timeout, + proj_method, + inner_epsilon, + reuse_active_set, + kwargs..., ) end @@ -281,17 +267,21 @@ function alternating_projections( callback=nothing, traj_data=[], timeout=Inf, + proj_method=frank_wolfe, + inner_epsilon::Function=t -> 1 / (t^2 + 1), + reuse_active_set=false, + kwargs..., ) where {N} # header and format string for output of the algorithm - headers = ["Type", "Iteration", "Dual Gap", "Infeas", "Time", "It/sec"] + headers = ["Type", "Iteration", "Dual Gap", "dist2", "Time", "It/sec"] format_string = "%6s %13s %14e %14e %14e %14e\n" - function format_state(state, infeas) + function format_state(state, primal) rep = ( steptype_string[Symbol(state.step_type)], string(state.t), Float64(state.dual_gap), - Float64(infeas), + Float64(primal), state.time, state.t / state.time, ) @@ -300,27 +290,61 @@ function alternating_projections( t = 0 dual_gap = Inf - x = fill(x0, N) - v = similar(x) + dual_gaps = fill(Inf, N) + x = BlockVector(compute_extreme_point.(lmo.lmos, fill(x0, N))) step_type = ST_REGULAR gradient = similar(x) - ndim = ndims(x) - infeasibility(x) = sum( - fast_dot( - selectdim(x, ndim, i) - selectdim(x, ndim, j), - selectdim(x, ndim, i) - selectdim(x, ndim, j), - ) for i in 1:N for j in 1:i-1 - ) + if reuse_active_set + if proj_method ∉ + [away_frank_wolfe, blended_pairwise_conditional_gradient, pairwise_frank_wolfe] + error("The selected FW method does not support active sets reuse.") + end + active_sets = [ActiveSet([(1.0, x.blocks[i])]) for i in 1:N] + end - partial_infeasibility(x) = - sum(fast_dot(x[mod(i - 2, N)+1] - x[i], x[mod(i - 2, N)+1] - x[i]) for i in 1:N) + dist2(x::BlockVector) = + 0.5 * (N - 1) * sum(fast_dot(x.blocks[i], x.blocks[i]) for i in 1:N) - + sum(fast_dot(x.blocks[i], x.blocks[j]) for i in 1:N for j in 1:i-1) function grad!(storage, x) - @. storage = [2 * (x[i] - x[mod(i - 2, N)+1]) for i in 1:N] + return storage.blocks = [2.0 * (N * b - sum(x.blocks)) for b in x.blocks] end - projection_step(x, i, t) = ProjectionFW(x, lmo.lmos[i]; eps=1 / (t^2 + 1)) + function projection_step(i, t) + xii = x.blocks[mod(i - 2, N)+1] # iterate in previous block + f(y) = sum(abs2, y - xii) + function grad_proj!(storage, y) + return storage .= 2 * (y - xii) + end + + if reuse_active_set + + results = proj_method( + f, + grad_proj!, + lmo.lmos[i], + active_sets[i]; + epsilon=inner_epsilon(t), + max_iteration=10000, + line_search=Adaptive(), + kwargs..., + ) + active_sets[i] = results[:active_set] + else + results = proj_method( + f, + grad_proj!, + lmo.lmos[i], + x.blocks[i]; + epsilon=inner_epsilon(t), + max_iteration=10000, + line_search=Adaptive(), + kwargs..., + ) + end + return results[:x], results[:dual_gap] + end if trajectory @@ -372,19 +396,18 @@ function alternating_projections( # Projection step: for i in 1:N # project the previous iterate on the i-th feasible region - x[i] = projection_step(x[mod(i - 2, N)+1], i, t) + x.blocks[i], dual_gaps[i] = projection_step(i, t) end + dual_gap = sum(dual_gaps) + # Update gradients grad!(gradient, x) - # Update dual gaps - v = compute_extreme_point.(lmo.lmos, gradient) - dual_gap = fast_dot(x - v, gradient) # go easy on the memory - only compute if really needed if ((mod(t, print_iter) == 0 && verbose) || callback !== nothing) - infeas = infeasibility(x) + primal = dist2(x) end first_iter = false @@ -393,12 +416,12 @@ function alternating_projections( if callback !== nothing state = CallbackState( t, - infeas, - infeas - dual_gap, + primal, + primal - dual_gap, dual_gap, tot_time, x, - v, + nothing, nothing, nothing, nothing, @@ -408,7 +431,7 @@ function alternating_projections( step_type, ) # @show state - if callback(state, infeas) === false + if callback(state, primal) === false break end end @@ -419,9 +442,9 @@ function alternating_projections( # this is important as some variants do not recompute f(x) and the dual_gap regularly but only when reporting # hence the final computation. step_type = ST_LAST - infeas = infeasibility(x) + primal = dist2(x) grad!(gradient, x) - v = compute_extreme_point.(lmo.lmos, gradient) + v = compute_extreme_point(lmo, gradient) dual_gap = fast_dot(x - v, gradient) tot_time = (time_ns() - time_start) / 1.0e9 @@ -429,8 +452,8 @@ function alternating_projections( if callback !== nothing state = CallbackState( t, - infeas, - infeas - dual_gap, + primal, + primal - dual_gap, dual_gap, tot_time, x, @@ -443,9 +466,9 @@ function alternating_projections( gradient, step_type, ) - callback(state, infeas) + callback(state, primal) end - return x, v, dual_gap, infeas, traj_data + return x, v, dual_gap, primal, traj_data end diff --git a/test/alternating_methods_tests.jl b/test/alternating_methods_tests.jl index 904de49d8..21deecbe4 100644 --- a/test/alternating_methods_tests.jl +++ b/test/alternating_methods_tests.jl @@ -27,7 +27,7 @@ lmo3 = FrankWolfe.ScaledBoundLInfNormBall(ones(n), 2 * ones(n)) grad!, (lmo_nb, lmo_prob), ones(n), - lambda=1.0, + lambda=0.5, line_search=FrankWolfe.Adaptive(relaxed_smoothness=true), ) @@ -40,7 +40,7 @@ lmo3 = FrankWolfe.ScaledBoundLInfNormBall(ones(n), 2 * ones(n)) grad!, (lmo_nb, lmo_prob), ones(n), - lambda=1/3, + lambda=1/6, line_search=FrankWolfe.Adaptive(relaxed_smoothness=true), ) @@ -53,7 +53,7 @@ lmo3 = FrankWolfe.ScaledBoundLInfNormBall(ones(n), 2 * ones(n)) grad!, (lmo_nb, lmo_prob), ones(n), - lambda=1/9, + lambda=1/18, line_search=FrankWolfe.Adaptive(relaxed_smoothness=true), ) @@ -66,7 +66,7 @@ lmo3 = FrankWolfe.ScaledBoundLInfNormBall(ones(n), 2 * ones(n)) grad!, (lmo_nb, lmo_prob), ones(n), - lambda=3.0, + lambda=1.5, line_search=FrankWolfe.Adaptive(relaxed_smoothness=true), ) @@ -134,6 +134,7 @@ lmo3 = FrankWolfe.ScaledBoundLInfNormBall(ones(n), 2 * ones(n)) ones(n), line_search=FrankWolfe.Agnostic(), momentum=0.9, + lambda=0.5 ) @test abs(x.blocks[1][1] - 0.5 / n) < 1e-3 @@ -160,6 +161,7 @@ end ones(n), line_search=FrankWolfe.Adaptive(relaxed_smoothness=true), update_order=order, + lambda=0.5, ) @test abs(x.blocks[1][1] - 0.5 / n) < 1e-5 @test abs(x.blocks[2][1] - 1 / n) < 1e-5 @@ -181,6 +183,7 @@ end grad!, (lmo2, lmo_prob), ones(n), + lambda=0.5 ) @test abs(x.blocks[1][1] - 0.5 / n) < 1e-6 @@ -244,8 +247,8 @@ end grad!, (lmo_nb, lmo_prob), ones(n), - lambda=3.0, - line_search=(FrankWolfe.Shortstep(8.0), FrankWolfe.Adaptive()), # L-smooth in coordinates for L = 2+2*λ + lambda=1.5, + line_search=(FrankWolfe.Shortstep(4.0), FrankWolfe.Adaptive()), # L-smooth in coordinates for L = 1+2*λ update_step=(FrankWolfe.BPCGStep(), FrankWolfe.FrankWolfeStep()), ) @@ -257,31 +260,31 @@ end x, _, _, _, _ = FrankWolfe.alternating_projections((lmo1, lmo_prob), rand(n), verbose=true) - @test abs(x[1][1]) < 1e-6 - @test abs(x[2][1] - 1 / n) < 1e-6 + @test abs(x.blocks[1][1]) < 1e-6 + @test abs(x.blocks[2][1] - 1 / n) < 1e-6 x, _, _, _, _ = FrankWolfe.alternating_projections((lmo3, lmo_prob), rand(n)) - @test abs(x[1][1] - 1) < 1e-6 - @test abs(x[2][1] - 1 / n) < 1e-6 + @test abs(x.blocks[1][1] - 1) < 1e-6 + @test abs(x.blocks[2][1] - 1 / n) < 1e-6 x, _, _, infeas, _ = FrankWolfe.alternating_projections((lmo1, lmo2), rand(n)) - @test abs(x[1][1]) < 1e-6 - @test abs(x[2][1]) < 1e-6 + @test abs(x.blocks[1][1]) < 1e-6 + @test abs(x.blocks[2][1]) < 1e-6 @test infeas < 1e-6 x, _, _, infeas, _ = FrankWolfe.alternating_projections((lmo2, lmo3), rand(n)) - @test abs(x[1][1] - 1) < 1e-4 - @test abs(x[2][1] - 1) < 1e-4 + @test abs(x.blocks[1][1] - 1) < 1e-4 + @test abs(x.blocks[2][1] - 1) < 1e-4 @test infeas < 1e-6 x, _, _, infeas, traj_data = FrankWolfe.alternating_projections((lmo1, lmo3), rand(n), trajectory=true) - @test abs(x[1][1]) < 1e-6 - @test abs(x[2][1] - 1) < 1e-6 + @test abs(x.blocks[1][1]) < 1e-6 + @test abs(x.blocks[2][1] - 1) < 1e-6 @test traj_data != [] @test length(traj_data[1]) == 5 @test length(traj_data) >= 2 From 73585af8bdb92e84d1c5afc554f31da3168ec302 Mon Sep 17 00:00:00 2001 From: "Jannis H." <44263801+JannisHal@users.noreply.github.com> Date: Thu, 24 Oct 2024 23:20:20 +0200 Subject: [PATCH 05/21] Update Block-Coordinate FW (#512) * BPCG step fixes * Update Dual(Progress/Gap)Order * Clean up * Remove bug in test * Immutable DualGapOrder --- src/block_coordinate_algorithms.jl | 77 +++++++++++++++++++++--------- test/alternating_methods_tests.jl | 2 +- 2 files changed, 55 insertions(+), 24 deletions(-) diff --git a/src/block_coordinate_algorithms.jl b/src/block_coordinate_algorithms.jl index 4a1fc7e6a..0d034a7d3 100644 --- a/src/block_coordinate_algorithms.jl +++ b/src/block_coordinate_algorithms.jl @@ -41,20 +41,29 @@ end StochasticUpdate() = StochasticUpdate(-1) -mutable struct DualGapOrder <: BlockCoordinateUpdateOrder +""" +The dual gap order initiates one round of `ļimit` many updates. +The according blocks are sampled with probabilties proportional to their respective dual gaps. +""" +struct DualGapOrder <: BlockCoordinateUpdateOrder limit::Int end DualGapOrder() = DualGapOrder(-1) +""" +The dual progress order initiates one round of `ļimit` many updates. +The according blocks are sampled with probabilties proportional to their respective dual progress. +""" mutable struct DualProgressOrder <: BlockCoordinateUpdateOrder limit::Int previous_dual_gaps::Vector{Float64} dual_progress::Vector{Float64} + last_indices::Vector{Int} end -DualProgressOrder() = DualProgressOrder(-1, [], []) -DualProgressOrder(i::Int64) = DualProgressOrder(i, [], []) +DualProgressOrder() = DualProgressOrder(-1, [], [], []) +DualProgressOrder(i::Int64) = DualProgressOrder(i, [], [], []) """ The Lazy update order is discussed in "Flexible block-iterative @@ -122,6 +131,19 @@ function select_update_indices(u::StochasticUpdate, s::CallbackState, _) return [[rand(1:l)] for i in 1:u.limit] end +function sample_without_replacement(n::Int, weights::Vector{Float64}) + weights = weights ./ sum(weights) + cumsum_weights = cumsum(weights) + indices = zeros(Int, n) + for i in 1:n + indices[i] = findfirst(cumsum_weights .> rand()) + weights[indices[i]] = 0 + weights = weights ./ sum(weights) + cumsum_weights = cumsum(weights) + end + return indices +end + function select_update_indices(u::DualProgressOrder, s::CallbackState, dual_gaps) l = length(s.lmo.lmos) @@ -129,22 +151,32 @@ function select_update_indices(u::DualProgressOrder, s::CallbackState, dual_gaps @assert u.limit <= l @assert u.limit > 0 || u.limit == -1 - # In the first iteration update every block so we get finite dual progress - if s.t < 2 - u.previous_dual_gaps = copy(dual_gaps) - return [[i] for i=1:l] - end - - if s.t == 1 - u.dual_progress = dual_gaps - u.previous_dual_gaps + # In the first two iterations update every block so we get finite dual progress + if s.t < 2 + u.dual_progress = ones(l) + indices = [[i for i=1:l]] else - diff = dual_gaps - u.previous_dual_gaps - u.dual_progress = [d == 0 ? u.dual_progress[i] : d for (i, d) in enumerate(diff)] + # Update dual progress only on updated blocks + for i in u.last_indices + d = u.previous_dual_gaps[i] - dual_gaps[i] + if d < 0 + u.dual_progress[i] = 0 + else + u.dual_progress[i] = d + end + end + n = u.limit == -1 ? l : u.limit + + # If less than n blocks have non-zero dual progress, update all of them + if sum(u.dual_progress .!= 0) < n + indices = [[i for i=1:l]] + else + indices = [sample_without_replacement(n, u.dual_progress)] + end end u.previous_dual_gaps = copy(dual_gaps) - - n = u.limit == -1 ? l : u.limit - return [[findfirst(cumsum(u.dual_progress/sum(u.dual_progress)) .> rand())] for _=1:n] + u.last_indices = vcat(indices...) + return indices end function select_update_indices(u::DualGapOrder, s::CallbackState, dual_gaps) @@ -156,11 +188,11 @@ function select_update_indices(u::DualGapOrder, s::CallbackState, dual_gaps) # In the first iteration update every block so we get finite dual gaps if s.t < 1 - return [[i] for i=1:l] + return [[i for i=1:l]] end n = u.limit == -1 ? l : u.limit - return [[findfirst(cumsum(dual_gaps/sum(dual_gaps)) .> rand())] for _=1:n] + return [sample_without_replacement(n, dual_gaps)] end function select_update_indices(update::LazyUpdate, s::CallbackState, dual_gaps) @@ -237,19 +269,18 @@ mutable struct BPCGStep <: UpdateStep phi::Float64 end -function Base.copy(::FrankWolfeStep) - return FrankWolfeStep() -end +Base.copy(::FrankWolfeStep) = FrankWolfeStep() function Base.copy(obj::BPCGStep) if obj.active_set === nothing - return BPCGStep(false, nothing, obj.renorm_interval, obj.lazy_tolerance, Inf) + return BPCGStep(obj.lazy, nothing, obj.renorm_interval, obj.lazy_tolerance, obj.phi) else return BPCGStep(obj.lazy, copy(obj.active_set), obj.renorm_interval, obj.lazy_tolerance, obj.phi) end end BPCGStep() = BPCGStep(false, nothing, 1000, 2.0, Inf) +BPCGStep(lazy::Bool) = BPCGStep(lazy, nothing, 1000, 2.0, Inf) function update_iterate( ::FrankWolfeStep, @@ -324,7 +355,7 @@ function update_iterate( # minor modification from original paper for improved sparsity # (proof follows with minor modification when estimating the step) - if local_gap > s.phi / s.lazy_tolerance + if local_gap > max(s.phi / s.lazy_tolerance, 0) # Robust condition to not drop the zero-vector if the dual_gap is negative by inaccuracy d = muladd_memory_mode(memory_mode, d, a, v_local) vertex_taken = v_local gamma_max = a_lambda diff --git a/test/alternating_methods_tests.jl b/test/alternating_methods_tests.jl index 21deecbe4..7bf9487a6 100644 --- a/test/alternating_methods_tests.jl +++ b/test/alternating_methods_tests.jl @@ -223,7 +223,7 @@ end grad!, (lmo1, lmo2), ones(n), - update_step=FrankWolfe.BPCGStep(true, nothing, 1000, false, Inf), + update_step=FrankWolfe.BPCGStep(true), ) @test abs(x.blocks[1][1]) < 1e-6 From 1ac1797d59ac9f5df5d882ff811be5f3f81bbf90 Mon Sep 17 00:00:00 2001 From: Sebastian Pokutta <23001135+pokutta@users.noreply.github.com> Date: Fri, 25 Oct 2024 11:12:54 +0200 Subject: [PATCH 06/21] BPCG with direct solve extension (#507) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * added direct solve feature BPCG via LP solver * adjusted for arbitrary LP solver * fixed deps * cleanup and comment * minor * added reporting of direct solve step * chose highs as standard solver * added sparsification * added sparsification code * cleanup * minor cleanup * minor * added generalized direct_solve * clean up, docu, additional direct_solve * docstrings fixed? * sparsifier active set (#508) * sparsifier active set * fix typo * added sparsifying tests * generic tolerane * remove sparsification * format * HiGHS dep * Quadratic solve structure (#511) * sparsifier active set * start working on LP AS * first working quadratic * remove quadratic LP from current * cleanup * HiGHS in test deps * working reworked LP quadratic * working version generic quadratic * slow version generic quadratic * faster term manipulation * copy sufficient * remove comment * added test for quadratic * minor * simplify example * clean up code, verify error with ASQuad * Add update_weights! to fix direct solve with active_set_quadratic * remove direct solve from BPCG * rng changed --------- Co-authored-by: Sébastien Designolle * update example * format * clean up example * fix callback --------- Co-authored-by: Mathieu Besançon Co-authored-by: Sébastien Designolle --- Project.toml | 3 +- examples/blended_pairwise_with_direct.jl | 174 ++++++++ ...wise_with_direct_non-standard-quadratic.jl | 146 +++++++ examples/blended_pairwise_with_sparsify.jl | 100 +++++ examples/simple_stateless.jl | 2 +- src/FrankWolfe.jl | 2 + src/active_set.jl | 4 + src/active_set_quadratic.jl | 18 +- src/active_set_quadratic_direct_solve.jl | 383 ++++++++++++++++++ src/active_set_sparsifier.jl | 96 +++++ src/blended_pairwise.jl | 2 - src/moi_oracle.jl | 2 +- src/utils.jl | 71 ++++ test/quadratic_lp_active_set.jl | 113 ++++++ test/runtests.jl | 14 + test/sparsifying_activeset.jl | 85 ++++ 16 files changed, 1206 insertions(+), 9 deletions(-) create mode 100644 examples/blended_pairwise_with_direct.jl create mode 100644 examples/blended_pairwise_with_direct_non-standard-quadratic.jl create mode 100644 examples/blended_pairwise_with_sparsify.jl create mode 100644 src/active_set_quadratic_direct_solve.jl create mode 100644 src/active_set_sparsifier.jl create mode 100644 test/quadratic_lp_active_set.jl create mode 100644 test/sparsifying_activeset.jl diff --git a/Project.toml b/Project.toml index e56223008..ca298fa1a 100644 --- a/Project.toml +++ b/Project.toml @@ -43,6 +43,7 @@ DynamicPolynomials = "7c1d4256-1411-5781-91ec-d7bc3513ac07" FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" GLPK = "60bf3e95-4087-53dc-ae20-288a0d20c6a6" +HiGHS = "87dc4568-4c63-4d18-b0c0-bb2238e4078b" Hypatia = "b99e6be6-89ff-11e8-14f8-45c827f4f8f2" JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" JuMP = "4076af6c-e467-56ae-b986-b466b2749572" @@ -58,4 +59,4 @@ Tullio = "bc48ee85-29a4-5162-ae0b-a64e1601d4bc" ZipFile = "a5390f91-8eb1-5f08-bee0-b1d1ffed6cea" [targets] -test = ["CSV", "Combinatorics", "DataFrames", "Distributions", "DoubleFloats", "DynamicPolynomials", "FiniteDifferences", "ForwardDiff", "GLPK", "JSON", "JuMP", "LaTeXStrings", "MAT", "MultivariatePolynomials", "Plots", "PlotThemes", "Polyhedra", "ReverseDiff", "ZipFile", "Test", "Tullio", "Clp", "Hypatia"] +test = ["CSV", "Combinatorics", "DataFrames", "Distributions", "DoubleFloats", "DynamicPolynomials", "FiniteDifferences", "ForwardDiff", "GLPK", "HiGHS", "JSON", "JuMP", "LaTeXStrings", "MAT", "MultivariatePolynomials", "Plots", "PlotThemes", "Polyhedra", "ReverseDiff", "ZipFile", "Test", "Tullio", "Clp", "Hypatia"] diff --git a/examples/blended_pairwise_with_direct.jl b/examples/blended_pairwise_with_direct.jl new file mode 100644 index 000000000..9a30bde76 --- /dev/null +++ b/examples/blended_pairwise_with_direct.jl @@ -0,0 +1,174 @@ +#= +This example demonstrates the use of the Blended Pairwise Conditional Gradient algorithm +with direct solve steps for a quadratic optimization problem over a sparse polytope. + +Note the special structure of f(x) = norm(x - x0)^2 that we assume here + +The example showcases how the algorithm balances between: +- Pairwise steps for efficient optimization +- Periodic direct solves for handling the quadratic objective +- Lazy (approximate) linear minimization steps for improved iteration complexity + +It also demonstrates how to set up custom callbacks for tracking algorithm progress. +=# + +using FrankWolfe +using LinearAlgebra +using Random + +import HiGHS +import MathOptInterface as MOI + +include("../examples/plot_utils.jl") + +n = Int(1e4) +k = 10_000 + +s = 10 +@info "Seed $s" +Random.seed!(s) + +xpi = rand(n); +total = sum(xpi); + +const xp = xpi ./ total; + +f(x) = norm(x - xp)^2 +function grad!(storage, x) + @. storage = 2 * (x - xp) +end + +lmo = FrankWolfe.KSparseLMO(5, 1.0) + +const x00 = FrankWolfe.compute_extreme_point(lmo, rand(n)) + +function build_callback(trajectory_arr) + return function callback(state, active_set, args...) + return push!(trajectory_arr, (FrankWolfe.callback_state(state)..., length(active_set))) + end +end + + +trajectoryBPCG_standard = [] +@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + copy(x00), + max_iteration=k, + line_search=FrankWolfe.Shortstep(2.0), + verbose=true, + callback=build_callback(trajectoryBPCG_standard), +); + +# Just projection quadratic +trajectoryBPCG_quadratic = [] +as_quad = FrankWolfe.ActiveSetQuadratic([(1.0, copy(x00))], 2 * LinearAlgebra.I, -2xp) +@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + as_quad, + max_iteration=k, + line_search=FrankWolfe.Shortstep(2.0), + verbose=true, + callback=build_callback(trajectoryBPCG_quadratic), +); + +as_quad = FrankWolfe.ActiveSetQuadratic([(1.0, copy(x00))], 2 * LinearAlgebra.I, -2xp) + +# with quadratic active set +trajectoryBPCG_quadratic_as = [] +@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + as_quad, + max_iteration=k, + line_search=FrankWolfe.Shortstep(2.0), + verbose=true, + callback=build_callback(trajectoryBPCG_quadratic_as), +); + +as_quad_direct = FrankWolfe.ActiveSetQuadraticLinearSolve( + [(1.0, copy(x00))], + 2 * LinearAlgebra.I, + -2xp, + MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)), +) + +# with LP acceleration +trajectoryBPCG_quadratic_direct = [] +@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + as_quad_direct, + max_iteration=k, + line_search=FrankWolfe.Shortstep(2.0), + verbose=true, + callback=build_callback(trajectoryBPCG_quadratic_direct), +); + +as_quad_direct_generic = FrankWolfe.ActiveSetQuadraticLinearSolve( + [(1.0, copy(x00))], + 2 * Diagonal(ones(length(xp))), + -2xp, + MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)), +) + +# with LP acceleration +trajectoryBPCG_quadratic_direct_generic = [] +@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + as_quad_direct_generic, + max_iteration=k, + line_search=FrankWolfe.Shortstep(2.0), + verbose=true, + callback=build_callback(trajectoryBPCG_quadratic_direct_generic), +); + +as_quad_direct_basic_as = FrankWolfe.ActiveSetQuadraticLinearSolve( + FrankWolfe.ActiveSet([1.0], [copy(x00)], collect(x00)), + 2 * LinearAlgebra.I, + -2xp, + MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)), +) + +# with LP acceleration +trajectoryBPCG_quadratic_noqas = [] + +@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + as_quad_direct_basic_as, + max_iteration=k, + line_search=FrankWolfe.Shortstep(2.0), + verbose=true, + callback=build_callback(trajectoryBPCG_quadratic_noqas), +); + + +# Update the data and labels for plotting +data_trajectories = [ + trajectoryBPCG_standard, + trajectoryBPCG_quadratic, + trajectoryBPCG_quadratic_as, + trajectoryBPCG_quadratic_direct, + trajectoryBPCG_quadratic_direct_generic, + trajectoryBPCG_quadratic_noqas, +] +labels_trajectories = [ + "BPCG (Standard)", + "BPCG (Specific Direct)", + "AS_Quad", + "Reloaded", + "Reloaded_generic", + "Reloaded_noqas", +] + +# Plot trajectories +plot_trajectories(data_trajectories, labels_trajectories, xscalelog=false) diff --git a/examples/blended_pairwise_with_direct_non-standard-quadratic.jl b/examples/blended_pairwise_with_direct_non-standard-quadratic.jl new file mode 100644 index 000000000..ab934edd2 --- /dev/null +++ b/examples/blended_pairwise_with_direct_non-standard-quadratic.jl @@ -0,0 +1,146 @@ +#= +This example demonstrates the use of the Blended Pairwise Conditional Gradient algorithm +with direct solve steps for a quadratic optimization problem over a sparse polytope which is not standard quadratic. + +The example showcases how the algorithm balances between: +- Pairwise steps for efficient optimization +- Periodic direct solves for handling the quadratic objective +- Lazy (approximate) linear minimization steps for improved iteration complexity + +It also demonstrates how to set up custom callbacks for tracking algorithm progress. +=# + +using FrankWolfe +using LinearAlgebra +using Random + +import HiGHS +import MathOptInterface as MOI + +include("../examples/plot_utils.jl") + +n = Int(1e2) +k = 10000 + +# s = rand(1:100) +s = 10 +@info "Seed $s" +Random.seed!(s) + +A = let + A = randn(n, n) + A' * A +end +@assert isposdef(A) == true + +const y = Random.rand(Bool, n) * 0.6 .+ 0.3 + +function f(x) + d = x - y + return dot(d, A, d) +end + +function grad!(storage, x) + mul!(storage, A, x) + return mul!(storage, A, y, -2, 2) +end + + +# lmo = FrankWolfe.KSparseLMO(5, 1000.0) + +## other LMOs to try +# lmo_big = FrankWolfe.KSparseLMO(100, big"1.0") +# lmo = FrankWolfe.LpNormLMO{Float64,5}(100.0) +# lmo = FrankWolfe.ProbabilitySimplexOracle(100.0); +lmo = FrankWolfe.UnitSimplexOracle(10000.0); + +x00 = FrankWolfe.compute_extreme_point(lmo, rand(n)) + + +function build_callback(trajectory_arr) + return function callback(state, active_set, args...) + return push!(trajectory_arr, (FrankWolfe.callback_state(state)..., length(active_set))) + end +end + + +trajectoryBPCG_standard = [] +callback = build_callback(trajectoryBPCG_standard) + +x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + copy(x00), + max_iteration=k, + line_search=FrankWolfe.Adaptive(), + print_iter=k / 10, + memory_mode=FrankWolfe.InplaceEmphasis(), + verbose=true, + trajectory=true, + callback=callback, +); + +active_set_quadratic_automatic = FrankWolfe.ActiveSetQuadraticLinearSolve( + [(1.0, copy(x00))], + grad!, + MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)), + scheduler=FrankWolfe.LogScheduler(start_time=100, scaling_factor=1.2, max_interval=100), +) +trajectoryBPCG_quadratic_automatic = [] +x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + active_set_quadratic_automatic, + max_iteration=k, + verbose=true, + callback=build_callback(trajectoryBPCG_quadratic_automatic), +); + +active_set_quadratic_automatic2 = FrankWolfe.ActiveSetQuadraticLinearSolve( + [(1.0, copy(x00))], + grad!, + MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)), + scheduler=FrankWolfe.LogScheduler(start_time=10, scaling_factor=2), +) +trajectoryBPCG_quadratic_automatic2 = [] +x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + active_set_quadratic_automatic2, + max_iteration=k, + verbose=true, + callback=build_callback(trajectoryBPCG_quadratic_automatic2), +); + + +active_set_quadratic_automatic_standard = FrankWolfe.ActiveSetQuadraticLinearSolve( + FrankWolfe.ActiveSet([(1.0, copy(x00))]), + grad!, + MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)), + scheduler=FrankWolfe.LogScheduler(start_time=10, scaling_factor=2), +) +trajectoryBPCG_quadratic_automatic_standard = [] +x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + active_set_quadratic_automatic_standard, + max_iteration=k, + verbose=true, + callback=build_callback(trajectoryBPCG_quadratic_automatic_standard), +); + + +dataSparsity = [ + trajectoryBPCG_standard, + trajectoryBPCG_quadratic_automatic, + trajectoryBPCG_quadratic_automatic_standard, +] +labelSparsity = ["BPCG (Standard)", "AS_Quad", "AS_Standard"] + + +# Plot trajectories +plot_trajectories(dataSparsity, labelSparsity, xscalelog=false) diff --git a/examples/blended_pairwise_with_sparsify.jl b/examples/blended_pairwise_with_sparsify.jl new file mode 100644 index 000000000..6d42b7de4 --- /dev/null +++ b/examples/blended_pairwise_with_sparsify.jl @@ -0,0 +1,100 @@ +#= +This example demonstrates the use of the Blended Pairwise Conditional Gradient algorithm +with direct solve steps for a quadratic optimization problem over a sparse polytope. + +The example showcases how the algorithm balances between: +- Pairwise steps for efficient optimization +- Periodic direct solves for handling the quadratic objective +- Lazy (approximate) linear minimization steps for improved iteration complexity + +It also demonstrates how to set up custom callbacks for tracking algorithm progress. +=# + +using FrankWolfe +using LinearAlgebra +using Random + +import HiGHS + +include("../examples/plot_utils.jl") + +n = Int(1e4) +k = 10000 + +s = 10 +@info "Seed $s" +Random.seed!(s) + +xpi = rand(n); +total = sum(xpi); + +# here the optimal solution lies in the interior if you want an optimal solution on a face and not the interior use: +# const xp = xpi; + +const xp = xpi ./ total; + +f(x) = norm(x - xp)^2 +function grad!(storage, x) + @. storage = 2 * (x - xp) +end + +# lmo = FrankWolfe.KSparseLMO(2, 1.0) + +## other LMOs to try +# lmo_big = FrankWolfe.KSparseLMO(100, big"1.0") +lmo = FrankWolfe.LpNormLMO{Float64,5}(1.0) +# lmo = FrankWolfe.ProbabilitySimplexOracle(1.0); +# lmo = FrankWolfe.UnitSimplexOracle(1.0); + +x00 = FrankWolfe.compute_extreme_point(lmo, rand(n)) + +function build_callback(trajectory_arr) + return function callback(state, active_set, args...) + return push!(trajectory_arr, (FrankWolfe.callback_state(state)..., length(active_set))) + end +end + +FrankWolfe.benchmark_oracles(f, grad!, () -> randn(n), lmo; k=100) + +trajectoryBPCG_standard = [] +@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + copy(x00), + max_iteration=k, + line_search=FrankWolfe.Shortstep(2.0), + print_iter=k / 10, + memory_mode=FrankWolfe.InplaceEmphasis(), + verbose=true, + trajectory=true, + callback=build_callback(trajectoryBPCG_standard), +); + +trajectoryBPCG_as_sparse = [] +active_set_sparse = FrankWolfe.ActiveSetSparsifier( + FrankWolfe.ActiveSet([1.0], [x00], similar(x00)), + HiGHS.Optimizer(), +) + +@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + copy(x00), + max_iteration=k, + line_search=FrankWolfe.Shortstep(2.0), + print_iter=k / 10, + memory_mode=FrankWolfe.InplaceEmphasis(), + verbose=true, + trajectory=true, + callback=build_callback(trajectoryBPCG_as_sparse), +); + +# Reduction primal/dual error vs. sparsity of solution + +dataSparsity = [trajectoryBPCG_standard, trajectoryBPCG_quadratic] +labelSparsity = ["BPCG (Standard)", "BPCG (Sparsify)"] + +# Plot sparsity +plot_sparsity(dataSparsity, labelSparsity, legend_position=:topright) diff --git a/examples/simple_stateless.jl b/examples/simple_stateless.jl index 6a550a963..6bf230e26 100644 --- a/examples/simple_stateless.jl +++ b/examples/simple_stateless.jl @@ -71,7 +71,7 @@ plot_trajectories([trajectory_simple[1:end], trajectory_restart[1:end]], ["simpl # simple step iterations about 33% faster -@test line_search.factor <= 44 +@test line_search.factor <= 51 x, v, primal, dual_gap, trajectory_restart_highpres = FrankWolfe.frank_wolfe( f, diff --git a/src/FrankWolfe.jl b/src/FrankWolfe.jl index e6d29c3cc..cc2fef145 100644 --- a/src/FrankWolfe.jl +++ b/src/FrankWolfe.jl @@ -34,6 +34,8 @@ include("moi_oracle.jl") include("function_gradient.jl") include("active_set.jl") include("active_set_quadratic.jl") +include("active_set_quadratic_direct_solve.jl") +include("active_set_sparsifier.jl") include("blended_cg.jl") include("afw.jl") diff --git a/src/active_set.jl b/src/active_set.jl index 62393b47f..c02145666 100644 --- a/src/active_set.jl +++ b/src/active_set.jl @@ -326,3 +326,7 @@ function compute_active_set_iterate!(active_set::AbstractActiveSet{<:ScaledHotVe end return active_set.x end + +function update_weights!(as::AbstractActiveSet, new_weights) + as.weights .= new_weights +end diff --git a/src/active_set_quadratic.jl b/src/active_set_quadratic.jl index 8f26b632e..a616dd1fb 100644 --- a/src/active_set_quadratic.jl +++ b/src/active_set_quadratic.jl @@ -49,7 +49,8 @@ function detect_quadratic_function(grad!, x0; test=true) end function ActiveSetQuadratic(tuple_values::AbstractVector{Tuple{R,AT}}, grad!::Function) where {AT,R} - return ActiveSetQuadratic(tuple_values, detect_quadratic_function(grad!, tuple_values[1][2])...) + A, b = detect_quadratic_function(grad!, tuple_values[1][2]) + return ActiveSetQuadratic(tuple_values, A, b) end function ActiveSetQuadratic(tuple_values::AbstractVector{Tuple{R,AT}}, A::H, b) where {AT,R,H} @@ -71,13 +72,15 @@ function ActiveSetQuadratic(tuple_values::AbstractVector{Tuple{R,AT}}, A::H, b) dots_b[idx] = fast_dot(b, atoms[idx]) end x = similar(b) - as = ActiveSetQuadratic{AT,R,typeof(x),H}(weights, atoms, x, A, b, dots_x, dots_A, dots_b, weights_prev, modified) + as = ActiveSetQuadratic(weights, atoms, x, A, b, dots_x, dots_A, dots_b, weights_prev, modified) + compute_active_set_iterate!(as) return as end -function ActiveSetQuadratic{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, grad!) where {AT,R} - return ActiveSetQuadratic{AT,R}(tuple_values, detect_quadratic_function(grad!, tuple_values[1][2])...) +function ActiveSetQuadratic{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, grad!::Function) where {AT,R} + A, b = detect_quadratic_function(grad!, tuple_values[1][2]) + return ActiveSetQuadratic{AT,R}(tuple_values, A, b) end function ActiveSetQuadratic{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, A::H, b) where {AT,R,H} @@ -116,6 +119,7 @@ function Base.:*(a::Identity, b) return a.λ * b end end + function ActiveSetQuadratic(tuple_values::AbstractVector{Tuple{R,AT}}, A::UniformScaling, b) where {AT,R} return ActiveSetQuadratic(tuple_values, Identity(A.λ), b) end @@ -147,6 +151,7 @@ function Base.push!(as::ActiveSetQuadratic{AT,R}, (λ, a)) where {AT,R} return as end +# TODO multi-indices version function Base.deleteat!(as::ActiveSetQuadratic, idx::Int) @inbounds for i in 1:idx-1 as.dots_x[i] -= as.weights_prev[idx] * as.dots_A[idx][i] @@ -311,3 +316,8 @@ function update_active_set_quadratic!(warm_as::ActiveSetQuadratic{AT,R,IT,H}, b) return warm_as end +function update_weights!(as::ActiveSetQuadratic, new_weights) + as.weights_prev .= as.weights + as.weights .= new_weights + as.modified .= true +end diff --git a/src/active_set_quadratic_direct_solve.jl b/src/active_set_quadratic_direct_solve.jl new file mode 100644 index 000000000..fef53a077 --- /dev/null +++ b/src/active_set_quadratic_direct_solve.jl @@ -0,0 +1,383 @@ + +""" + ActiveSetQuadraticLinearSolve{AT, R, IT} + +Represents an active set of extreme vertices collected in a FW algorithm, +along with their coefficients `(λ_i, a_i)`. +`R` is the type of the `λ_i`, `AT` is the type of the atoms `a_i`. +The iterate `x = ∑λ_i a_i` is stored in x with type `IT`. +The objective function is assumed to be of the form `f(x)=½⟨x,Ax⟩+⟨b,x⟩+c` +so that the gradient is `∇f(x)=Ax+b`. + +This active set stores an inner `active_set` that keeps track of the current set of vertices and convex decomposition. +It therefore delegates all update, deletion, and addition operations to this inner active set. +The `weight`, `atoms`, and `x` fields should only be accessed to read and are effectively the same objects as those in the inner active set. + +The structure also contains a scheduler struct which is called with the `should_solve_lp` function. +To define a new frequency at which the LP should be solved, one can define another scheduler struct and implement the corresponding method. +""" +struct ActiveSetQuadraticLinearSolve{ + AT, + R<:Real, + IT, + H, + BT, + OT<:MOI.AbstractOptimizer, + AS<:AbstractActiveSet, + SF, +} <: AbstractActiveSet{AT,R,IT} + weights::Vector{R} + atoms::Vector{AT} + x::IT + A::H # Hessian matrix + b::BT # linear term + active_set::AS + lp_optimizer::OT + scheduler::SF + counter::Base.RefValue{Int} +end + +""" + ActiveSetQuadraticLinearSolve(tuple_values::Vector{Tuple{R,AT}}, grad!::Function, lp_optimizer) + +Creates an ActiveSetQuadraticLinearSolve by computing the Hessian and linear term from `grad!`. +""" +function ActiveSetQuadraticLinearSolve( + tuple_values::Vector{Tuple{R,AT}}, + grad!::Function, + lp_optimizer; + scheduler=LogScheduler(), +) where {AT,R} + A, b = detect_quadratic_function(grad!, tuple_values[1][2]) + return ActiveSetQuadraticLinearSolve(tuple_values, A, b, lp_optimizer, scheduler=scheduler) +end + +""" + ActiveSetQuadraticLinearSolve(tuple_values::Vector{Tuple{R,AT}}, A, b, lp_optimizer) + +Creates an `ActiveSetQuadraticLinearSolve` from the given Hessian `A`, linear term `b` and `lp_optimizer` by creating an inner `ActiveSetQuadratic` active set. +""" +function ActiveSetQuadraticLinearSolve( + tuple_values::Vector{Tuple{R,AT}}, + A::H, + b, + lp_optimizer; + scheduler=LogScheduler(), +) where {AT,R,H} + inner_as = ActiveSetQuadratic(tuple_values, A, b) + return ActiveSetQuadraticLinearSolve( + inner_as.weights, + inner_as.atoms, + inner_as.x, + inner_as.A, + inner_as.b, + inner_as, + lp_optimizer, + scheduler, + Ref(0), + ) +end + +function ActiveSetQuadraticLinearSolve( + inner_as::AbstractActiveSet, + A, + b, + lp_optimizer; + scheduler=LogScheduler(), +) + as = ActiveSetQuadraticLinearSolve( + inner_as.weights, + inner_as.atoms, + inner_as.x, + A, + b, + inner_as, + lp_optimizer, + scheduler, + Ref(0), + ) + compute_active_set_iterate!(as) + return as +end + +function ActiveSetQuadraticLinearSolve( + inner_as::AbstractActiveSet, + A::LinearAlgebra.UniformScaling, + b, + lp_optimizer; + scheduler=LogScheduler(), +) + as = ActiveSetQuadraticLinearSolve( + inner_as.weights, + inner_as.atoms, + inner_as.x, + A, + b, + inner_as, + lp_optimizer, + scheduler, + Ref(0), + ) + compute_active_set_iterate!(as) + return as +end + +function ActiveSetQuadraticLinearSolve( + inner_as::AbstractActiveSet, + grad!::Function, + lp_optimizer; + scheduler=LogScheduler(), +) + A, b = detect_quadratic_function(grad!, inner_as.atoms[1]) + return ActiveSetQuadraticLinearSolve(inner_as, A, b, lp_optimizer; scheduler=scheduler) +end + +function ActiveSetQuadraticLinearSolve{AT,R}( + tuple_values::Vector{<:Tuple{<:Number,<:Any}}, + grad!::Function, + lp_optimizer; + scheduler=LogScheduler(), +) where {AT,R} + A, b = detect_quadratic_function(grad!, tuple_values[1][2]) + return ActiveSetQuadraticLinearSolve{AT,R}( + tuple_values, + A, + b, + lp_optimizer; + scheduler=scheduler, + ) +end + +function ActiveSetQuadraticLinearSolve{AT,R}( + tuple_values::Vector{<:Tuple{<:Number,<:Any}}, + A::H, + b, + lp_optimizer; + scheduler=LogScheduler(), +) where {AT,R,H} + inner_as = ActiveSetQuadratic{AT,R}(tuple_values, A, b) + as = ActiveSetQuadraticLinearSolve{AT,R,typeof(x),H}( + inner_as.weights, + inner_as.atoms, + inner_as.x, + A, + b, + inner_as, + lp_optimizer, + scheduler, + Ref(0), + ) + compute_active_set_iterate!(as) + return as +end + +function ActiveSetQuadraticLinearSolve( + tuple_values::Vector{Tuple{R,AT}}, + A::UniformScaling, + b, + lp_optimizer; + scheduler=LogScheduler(), +) where {AT,R} + return ActiveSetQuadraticLinearSolve( + tuple_values, + Identity(A.λ), + b, + lp_optimizer; + scheduler=scheduler, + ) +end +function ActiveSetQuadraticLinearSolve{AT,R}( + tuple_values::Vector{<:Tuple{<:Number,<:Any}}, + A::UniformScaling, + b, + lp_optimizer; + scheduler=LogScheduler(), +) where {AT,R} + return ActiveSetQuadraticLinearSolve{AT,R}( + tuple_values, + Identity(A.λ), + b, + lp_optimizer; + scheduler=scheduler, + ) +end + +# all mutating functions are delegated to the inner active set + +Base.push!(as::ActiveSetQuadraticLinearSolve, tuple) = push!(as.active_set, tuple) + +Base.deleteat!(as::ActiveSetQuadraticLinearSolve, idx) = deleteat!(as.active_set, idx) + +Base.empty!(as::ActiveSetQuadraticLinearSolve) = empty!(as.active_set) + +function active_set_update!( + as::ActiveSetQuadraticLinearSolve{AT,R}, + lambda, + atom, + renorm=true, + idx=nothing; + weight_purge_threshold=weight_purge_threshold_default(R), + add_dropped_vertices=false, + vertex_storage=nothing, +) where {AT,R} + if idx === nothing + idx = find_atom(as, atom) + end + active_set_update!( + as.active_set, + lambda, + atom, + renorm, + idx; + weight_purge_threshold=weight_purge_threshold, + add_dropped_vertices=add_dropped_vertices, + vertex_storage=vertex_storage, + ) + # new atom introduced, we can solve the auxiliary LP + if idx < 0 + as.counter[] += 1 + if should_solve_lp(as, as.scheduler) + solve_quadratic_activeset_lp!(as) + end + end + return as +end + +active_set_renormalize!(as::ActiveSetQuadraticLinearSolve) = active_set_renormalize!(as.active_set) + +function active_set_argmin(as::ActiveSetQuadraticLinearSolve, direction) + return active_set_argmin(as.active_set, direction) +end + +function active_set_argminmax(as::ActiveSetQuadraticLinearSolve, direction; Φ=0.5) + return active_set_argminmax(as.active_set, direction; Φ=Φ) +end + +# generic quadratic with quadratic information provided + +""" + solve_quadratic_activeset_lp!(as::ActiveSetQuadraticLinearSolve{AT, R, IT, H})) + +Solves the auxiliary LP over the current active set. +The method is specialized by type `H` of the Hessian matrix `A`. +""" +function solve_quadratic_activeset_lp!( + as::ActiveSetQuadraticLinearSolve{AT,R,IT,<:AbstractMatrix}, +) where {AT,R,IT} + nv = length(as) + o = as.lp_optimizer + MOI.empty!(o) + λ = MOI.add_variables(o, nv) + # λ ≥ 0, ∑ λ == 1 + MOI.add_constraint.(o, λ, MOI.GreaterThan(0.0)) + sum_of_variables = MOI.ScalarAffineFunction(MOI.ScalarAffineTerm.(1.0, λ), 0.0) + MOI.add_constraint(o, sum_of_variables, MOI.EqualTo(1.0)) + # Vᵗ A V λ == -Vᵗ b + for atom in as.atoms + lhs = MOI.ScalarAffineFunction{Float64}([], 0.0) + Base.sizehint!(lhs.terms, nv) + # replaces direct sum because of MOI and MutableArithmetic slow sums + for j in 1:nv + push!(lhs.terms, MOI.ScalarAffineTerm(fast_dot(atom, as.A, as.atoms[j]), λ[j])) + end + rhs = -dot(atom, as.b) + MOI.add_constraint(o, lhs, MOI.EqualTo(rhs)) + end + MOI.set(o, MOI.ObjectiveFunction{typeof(sum_of_variables)}(), sum_of_variables) + MOI.set(o, MOI.ObjectiveSense(), MOI.MIN_SENSE) + MOI.optimize!(o) + if MOI.get(o, MOI.TerminationStatus()) ∉ (MOI.OPTIMAL, MOI.FEASIBLE_POINT, MOI.ALMOST_OPTIMAL) + return as + end + indices_to_remove = Int[] + new_weights = R[] + for idx in eachindex(λ) + weight_value = MOI.get(o, MOI.VariablePrimal(), λ[idx]) + if weight_value <= 1e-5 + push!(indices_to_remove, idx) + else + push!(new_weights, weight_value) + end + end + deleteat!(as.active_set, indices_to_remove) + @assert length(as) == length(new_weights) + update_weights!(as.active_set, new_weights) + active_set_cleanup!(as) + active_set_renormalize!(as) + compute_active_set_iterate!(as) + return as +end + +# special case of scaled identity Hessian + +function solve_quadratic_activeset_lp!( + as::ActiveSetQuadraticLinearSolve{AT,R,IT,<:Union{Identity,LinearAlgebra.UniformScaling}}, +) where {AT,R,IT} + hessian_scaling = as.A.λ + nv = length(as) + o = as.lp_optimizer + MOI.empty!(o) + λ = MOI.add_variables(o, nv) + # λ ≥ 0, ∑ λ == 1 + MOI.add_constraint.(o, λ, MOI.GreaterThan(0.0)) + sum_of_variables = MOI.ScalarAffineFunction(MOI.ScalarAffineTerm.(1.0, λ), 0.0) + MOI.add_constraint(o, sum_of_variables, MOI.EqualTo(1.0)) + # a Aᵗ A λ == -Aᵗ b + for atom in as.atoms + lhs = MOI.ScalarAffineFunction{Float64}([], 0.0) + Base.sizehint!(lhs.terms, nv) + # replaces direct sum because of MOI and MutableArithmetic slow sums + for j in 1:nv + push!(lhs.terms, MOI.ScalarAffineTerm(hessian_scaling * dot(atom, as.atoms[j]), λ[j])) + end + rhs = -dot(atom, as.b) + MOI.add_constraint(o, lhs, MOI.EqualTo(rhs)) + end + MOI.set(o, MOI.ObjectiveFunction{typeof(sum_of_variables)}(), sum_of_variables) + MOI.set(o, MOI.ObjectiveSense(), MOI.MIN_SENSE) + MOI.optimize!(o) + if MOI.get(o, MOI.TerminationStatus()) ∉ (MOI.OPTIMAL, MOI.FEASIBLE_POINT, MOI.ALMOST_OPTIMAL) + return as + end + indices_to_remove = Int[] + new_weights = R[] + for idx in eachindex(λ) + weight_value = MOI.get(o, MOI.VariablePrimal(), λ[idx]) + if weight_value <= 2 * weight_purge_threshold_default(typeof(weight_value)) + push!(indices_to_remove, idx) + else + push!(new_weights, weight_value) + end + end + deleteat!(as.active_set, indices_to_remove) + @assert length(as) == length(new_weights) + update_weights!(as.active_set, new_weights) + active_set_cleanup!(as) + @assert all(>=(0), new_weights) + active_set_renormalize!(as) + compute_active_set_iterate!(as) + return as +end + +struct LogScheduler{T} + start_time::Int + scaling_factor::T + max_interval::Int + current_interval::Base.RefValue{Int} + last_solve_counter::Base.RefValue{Int} +end + +LogScheduler(; start_time=20, scaling_factor=1.5, max_interval=1000) = + LogScheduler(start_time, scaling_factor, max_interval, Ref(start_time), Ref(0)) + +function should_solve_lp(as::ActiveSetQuadraticLinearSolve, scheduler::LogScheduler) + if as.counter[] - scheduler.last_solve_counter[] >= scheduler.current_interval[] + scheduler.last_solve_counter[] = as.counter[] + scheduler.current_interval[] = min( + round(Int, scheduler.scaling_factor * scheduler.current_interval[]), + scheduler.max_interval, + ) + return true + end + return false +end diff --git a/src/active_set_sparsifier.jl b/src/active_set_sparsifier.jl new file mode 100644 index 000000000..6bb523217 --- /dev/null +++ b/src/active_set_sparsifier.jl @@ -0,0 +1,96 @@ +struct ActiveSetSparsifier{AT,R,IT,AS<:AbstractActiveSet{AT,R,IT},OT<:MOI.AbstractOptimizer} <: + AbstractActiveSet{AT,R,IT} + active_set::AS + weights::Vector{R} + atoms::Vector{AT} + x::IT + optimizer::OT + minimum_vertices::Int + solve_frequency::Int + counter::Base.RefValue{Int} +end + +function ActiveSetSparsifier( + active_set::AbstractActiveSet, + optimizer::MOI.AbstractOptimizer; + minimum_vertices=50, + solve_frequency=50, +) + return ActiveSetSparsifier( + active_set, + active_set.weights, + active_set.atoms, + active_set.x, + optimizer, + minimum_vertices, + solve_frequency, + Ref(0), + ) +end + +function Base.push!(as::ActiveSetSparsifier, (λ, a)) + return push!(as.active_set, (λ, a)) +end + +Base.deleteat!(as::ActiveSetSparsifier, idx::Int) = deleteat!(as.active_set, idx) + +Base.empty!(as::ActiveSetSparsifier) = empty!(as.active_set) + +function active_set_update!( + as::ActiveSetSparsifier{AS,OT,AT,R,IT}, + lambda, + atom, + renorm=true, + idx=nothing; + kwargs..., +) where {AS,OT,AT,R,IT} + active_set_update!(as.active_set, lambda, atom, renorm, idx; kwargs...) + n = length(as) + as.counter[] += 1 + if n > as.minimum_vertices && mod(as.counter[], as.solve_frequency) == 0 + # sparsifying active set + MOI.empty!(as.optimizer) + x0 = as.active_set.x + λ = MOI.add_variables(as.optimizer, n) + # λ ∈ Δ_n ⇔ λ ≥ 0, ∑ λ == 1 + MOI.add_constraint.(as.optimizer, λ, MOI.GreaterThan(0.0)) + MOI.add_constraint(as.optimizer, sum(λ; init=0.0), MOI.EqualTo(1.0)) + x_sum = 0 * as.active_set.atoms[1] + for (idx, atom) in enumerate(as.active_set.atoms) + x_sum += λ[idx] * atom + end + for idx in eachindex(x_sum) + MOI.add_constraint(as.optimizer, x_sum[idx], MOI.EqualTo(x0[idx])) + end + # Set a dummy objective (minimize ∑λ) + dummy_objective = sum(λ; init=0.0) + MOI.set(as.optimizer, MOI.ObjectiveFunction{typeof(dummy_objective)}(), dummy_objective) + MOI.set(as.optimizer, MOI.ObjectiveSense(), MOI.MIN_SENSE) + MOI.optimize!(as.optimizer) + if MOI.get(as.optimizer, MOI.TerminationStatus()) in + (MOI.OPTIMAL, MOI.FEASIBLE_POINT, MOI.ALMOST_OPTIMAL) + indices_to_remove = Int[] + new_weights = R[] + for idx in eachindex(λ) + weight_value = MOI.get(as.optimizer, MOI.VariablePrimal(), λ[idx]) + if weight_value <= sqrt(weight_purge_threshold_default(typeof(weight_value))) + push!(indices_to_remove, idx) + else + push!(new_weights, weight_value) + end + end + deleteat!(as.active_set.atoms, indices_to_remove) + deleteat!(as.active_set.weights, indices_to_remove) + @assert length(as) == length(new_weights) + as.active_set.weights .= new_weights + active_set_renormalize!(as) + end + end + return as +end + +active_set_renormalize!(as::ActiveSetSparsifier) = active_set_renormalize!(as.active_set) + +active_set_argmin(as::ActiveSetSparsifier, direction) = active_set_argmin(as.active_set, direction) +active_set_argminmax(as::ActiveSetSparsifier, direction; Φ=0.5) = + active_set_argminmax(as.active_set, direction; Φ=Φ) diff --git a/src/blended_pairwise.jl b/src/blended_pairwise.jl index e2efec4d2..a42862063 100644 --- a/src/blended_pairwise.jl +++ b/src/blended_pairwise.jl @@ -1,4 +1,3 @@ - """ blended_pairwise_conditional_gradient(f, grad!, lmo, x0; kwargs...) @@ -325,7 +324,6 @@ function blended_pairwise_conditional_gradient( linesearch_workspace, memory_mode, ) - if callback !== nothing state = CallbackState( t, diff --git a/src/moi_oracle.jl b/src/moi_oracle.jl index 875b8001d..1471bd6e7 100644 --- a/src/moi_oracle.jl +++ b/src/moi_oracle.jl @@ -1,6 +1,6 @@ """ - MathOptLMO{OT <: MOI.Optimizer} <: LinearMinimizationOracle + MathOptLMO{OT <: MOI.AbstractOptimizer} <: LinearMinimizationOracle Linear minimization oracle with feasible space defined through a MathOptInterface.Optimizer. The oracle call sets the direction and reruns the optimizer. diff --git a/src/utils.jl b/src/utils.jl index 629f55e15..c6e01fd6f 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -163,6 +163,77 @@ function fast_dot(A::Matrix{T1}, B::SparseArrays.SparseMatrixCSC{T2}) where {T1, return s end +fast_dot(a, Q, b) = dot(a, Q, b) + +function fast_dot(a::SparseArrays.AbstractSparseVector{<:Real}, Q::Diagonal{<:Real}, b::AbstractVector{<:Real}) + if a === b + return _fast_quadratic_form_symmetric(a, Q) + end + d = Q.diag + nzvals = SparseArrays.nonzeros(a) + nzinds = SparseArrays.nonzeroinds(a) + return sum(eachindex(nzvals); init=zero(eltype(a))) do nzidx + nzvals[nzidx] * d[nzinds[nzidx]] * b[nzinds[nzidx]] + end +end + +function fast_dot(a::SparseArrays.AbstractSparseVector{<:Real}, Q::Diagonal{<:Real}, b::SparseArrays.AbstractSparseVector{<:Real}) + if a === b + return _fast_quadratic_form_symmetric(a, Q) + end + n = length(a) + if length(b) != n + throw( + DimensionMismatch("Vector a has a length $n but b has a length $(length(b))") + ) + end + anzind = SparseArrays.nonzeroinds(a) + bnzind = SparseArrays.nonzeroinds(b) + anzval = SparseArrays.nonzeros(a) + bnzval = SparseArrays.nonzeros(b) + s = zero(Base.promote_eltype(a, Q, b)) + + if isempty(anzind) || isempty(bnzind) + return s + end + + a_idx = 1 + b_idx = 1 + a_idx_last = length(anzind) + b_idx_last = length(bnzind) + + # go through the nonzero indices of a and b simultaneously + @inbounds while a_idx <= a_idx_last && b_idx <= b_idx_last + ia = anzind[a_idx] + ib = bnzind[b_idx] + if ia == ib + s += dot(anzval[a_idx], Q.diag[ia], bnzval[b_idx]) + a_idx += 1 + b_idx += 1 + elseif ia < ib + a_idx += 1 + else + b_idx += 1 + end + end + return s +end + + +function _fast_quadratic_form_symmetric(a, Q) + d = Q.diag + if length(d) != length(a) + throw(DimensionMismatch()) + end + nzvals = SparseArrays.nonzeros(a) + nzinds = SparseArrays.nonzeroinds(a) + s = zero(Base.promote_eltype(a, Q)) + @inbounds for nzidx in eachindex(nzvals) + s += nzvals[nzidx]^2 * d[nzinds[nzidx]] + end + return s +end + """ trajectory_callback(storage) diff --git a/test/quadratic_lp_active_set.jl b/test/quadratic_lp_active_set.jl new file mode 100644 index 000000000..a87b37cea --- /dev/null +++ b/test/quadratic_lp_active_set.jl @@ -0,0 +1,113 @@ +using FrankWolfe +using LinearAlgebra +using Random +using Test +import HiGHS +import MathOptInterface as MOI + +n = Int(1e4) +k = 5000 + +s = 10 +@info "Seed $s" +Random.seed!(s) + +xpi = rand(n); +total = sum(xpi); + +const xp = xpi ./ total; + +f(x) = norm(x - xp)^2 +function grad!(storage, x) + @. storage = 2 * (x - xp) +end + +lmo = FrankWolfe.KSparseLMO(5, 1.0) + +const x00 = FrankWolfe.compute_extreme_point(lmo, rand(n)) + +function build_callback(trajectory_arr) + return function callback(state, active_set, args...) + return push!(trajectory_arr, (FrankWolfe.callback_state(state)..., length(active_set))) + end +end + +trajectoryBPCG_standard = [] +x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + copy(x00), + max_iteration=k, + line_search=FrankWolfe.Shortstep(2.0), + verbose=false, + callback=build_callback(trajectoryBPCG_standard), +); + +as_quad_direct = FrankWolfe.ActiveSetQuadraticLinearSolve( + [(1.0, copy(x00))], + 2 * LinearAlgebra.I, + -2xp, + MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)), +) + +trajectoryBPCG_quadratic_direct_specialized = [] +x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + as_quad_direct, + max_iteration=k, + line_search=FrankWolfe.Shortstep(2.0), + verbose=false, + callback=build_callback(trajectoryBPCG_quadratic_direct_specialized), +); + +as_quad_direct_generic = FrankWolfe.ActiveSetQuadraticLinearSolve( + [(1.0, copy(x00))], + 2 * Diagonal(ones(length(xp))), + -2xp, + MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)), +) + +trajectoryBPCG_quadratic_direct_generic = [] +x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + as_quad_direct_generic, + max_iteration=k, + line_search=FrankWolfe.Shortstep(2.0), + verbose=true, + callback=build_callback(trajectoryBPCG_quadratic_direct_generic), +); + +as_quad_direct_basic_as = FrankWolfe.ActiveSetQuadraticLinearSolve( + FrankWolfe.ActiveSet([1.0], [copy(x00)], collect(x00)), + 2 * LinearAlgebra.I, + -2xp, + MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)), +) + +trajectoryBPCG_quadratic_noqas = [] +x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + as_quad_direct_basic_as, + max_iteration=k, + line_search=FrankWolfe.Shortstep(2.0), + verbose=true, + callback=build_callback(trajectoryBPCG_quadratic_noqas), +); + + +dual_gaps_quadratic_specialized = getindex.(trajectoryBPCG_quadratic_direct_specialized, 4) +dual_gaps_quadratic_generic = getindex.(trajectoryBPCG_quadratic_direct_generic, 4) +dual_gaps_quadratic_noqas = getindex.(trajectoryBPCG_quadratic_noqas, 4) +dual_gaps_bpcg = getindex.(trajectoryBPCG_standard, 4) + + +@test dual_gaps_quadratic_specialized[end] < dual_gaps_bpcg[end] +@test dual_gaps_quadratic_noqas[end] < dual_gaps_bpcg[end] +@test norm(dual_gaps_quadratic_noqas - dual_gaps_quadratic_noqas) ≤ k * 1e-5 diff --git a/test/runtests.jl b/test/runtests.jl index d984043dc..40f5ac366 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -798,6 +798,20 @@ using Test end end +module LpDirectSolveTest +using Test +@testset "LP solving for quadratic functions and active set" begin + include("quadratic_lp_active_set.jl") +end +end + +module SparsifyingActiveSetTest +using Test +@testset "Sparsifying active set" begin + include("sparsifying_activeset.jl") +end +end + include("generic-arrays.jl") include("blocks.jl") diff --git a/test/sparsifying_activeset.jl b/test/sparsifying_activeset.jl new file mode 100644 index 000000000..1fde16d53 --- /dev/null +++ b/test/sparsifying_activeset.jl @@ -0,0 +1,85 @@ +#= +This example demonstrates the use of the Blended Pairwise Conditional Gradient algorithm +with direct solve steps for a quadratic optimization problem over a sparse polytope. + +The example showcases how the algorithm balances between: +- Pairwise steps for efficient optimization +- Periodic direct solves for handling the quadratic objective +- Lazy (approximate) linear minimization steps for improved iteration complexity + +It also demonstrates how to set up custom callbacks for tracking algorithm progress. +=# + +using LinearAlgebra +using Test +using Random +using FrankWolfe +import HiGHS + +n = Int(1e4) +k = 10000 + +# s = rand(1:100) +s = 10 +@info "Seed $s" +Random.seed!(s) + +xpi = rand(n); +total = sum(xpi); + +# here the optimal solution lies in the interior if you want an optimal solution on a face and not the interior use: +# const xp = xpi; + +const xp = xpi ./ total; + +f(x) = norm(x - xp)^2 +function grad!(storage, x) + @. storage = 2 * (x - xp) +end + +lmo = FrankWolfe.LpNormLMO{Float64,5}(1.0) + +const x00 = FrankWolfe.compute_extreme_point(lmo, rand(n)) + +function build_callback(trajectory_arr) + return function callback(state, active_set, args...) + return push!(trajectory_arr, (FrankWolfe.callback_state(state)..., length(active_set))) + end +end + +trajectoryBPCG_standard = [] +x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + copy(x00), + max_iteration=k, + line_search=FrankWolfe.Shortstep(2.0), + verbose=false, + trajectory=false, + callback=build_callback(trajectoryBPCG_standard), +); + +active_set_sparse = FrankWolfe.ActiveSetSparsifier( + FrankWolfe.ActiveSet([1.0], [x00], similar(x00)), + HiGHS.Optimizer(), +) +trajectoryBPCG_as_sparse = [] +x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + copy(x00), + max_iteration=k, + line_search=FrankWolfe.Shortstep(2.0), + verbose=false, + callback=build_callback(trajectoryBPCG_as_sparse), +); + +as_cardinality_bpcg = getindex.(trajectoryBPCG_standard, 6) +as_cardinality_sparse = getindex.(trajectoryBPCG_as_sparse, 6) +@test maximum(as_cardinality_sparse - as_cardinality_bpcg) <= 0 + +dual_gap_bpcg = getindex.(trajectoryBPCG_standard, 4) +dual_gap_sparse = getindex.(trajectoryBPCG_as_sparse, 4) +@test maximum(dual_gap_sparse - dual_gap_bpcg) <= 1e-7 From 6bcb4b707a9c80b389ac924698660e0d383b34be Mon Sep 17 00:00:00 2001 From: dhendryc <92737336+dhendryc@users.noreply.github.com> Date: Mon, 28 Oct 2024 10:58:42 +0100 Subject: [PATCH 07/21] Actually use Backtracking in Secant (#517) * Show lowest dual gap in last iteration. * Add backtracking as a fall back linesearch in Secant. * Let the user decide the fallback line search. * Adjust syntax. * Type stability. * Actually use backtracking. --------- Co-authored-by: Hendrych --- src/linesearch.jl | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/linesearch.jl b/src/linesearch.jl index 5ae2d3fe1..f974b9c38 100644 --- a/src/linesearch.jl +++ b/src/linesearch.jl @@ -425,7 +425,7 @@ function perform_line_search( while abs(dot_gdir) > line_search.tol if i > line_search.limit_num_steps workspace.last_gamma = best_gamma # Update last_gamma before returning - return best_gamma + break end grad!(grad_storage, storage) @@ -433,7 +433,7 @@ function perform_line_search( if dot_gdir_new ≈ dot_gdir workspace.last_gamma = best_gamma # Update last_gamma before returning - return best_gamma + break end gamma_new = gamma - dot_gdir_new * (gamma - gamma_prev) / (dot_gdir_new - dot_gdir) @@ -452,6 +452,12 @@ function perform_line_search( i += 1 end if abs(dot_gdir) > line_search.tol + # Choose gamma_max to be domain feasible + storage = muladd_memory_mode(memory_mode, storage, x, gamma_max, d) + while !line_search.domain_oracle(storage) + gamma_max /= 2 + storage = muladd_memory_mode(memory_mode, storage, x, gamma_max, d) + end gamma = perform_line_search( line_search.inner_ls, 0, @@ -468,8 +474,9 @@ function perform_line_search( storage = muladd_memory_mode(memory_mode, storage, x, gamma, d) new_val = f(storage) - @assert new_val <= best_val - best_gamma = gamma + if new_val <= best_val + best_gamma = gamma + end end workspace.last_gamma = best_gamma # Update last_gamma before returning return best_gamma From ccf1367a8325848a785ed2a2157188ec39b97fb6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mathieu=20Besan=C3=A7on?= Date: Mon, 28 Oct 2024 11:46:13 +0100 Subject: [PATCH 08/21] fix sparsify types (#518) * fix sparsify types * remove log --- examples/blended_pairwise_with_sparsify.jl | 14 ++++---------- src/active_set_sparsifier.jl | 4 ++-- 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/examples/blended_pairwise_with_sparsify.jl b/examples/blended_pairwise_with_sparsify.jl index 6d42b7de4..7700268bb 100644 --- a/examples/blended_pairwise_with_sparsify.jl +++ b/examples/blended_pairwise_with_sparsify.jl @@ -64,8 +64,6 @@ trajectoryBPCG_standard = [] copy(x00), max_iteration=k, line_search=FrankWolfe.Shortstep(2.0), - print_iter=k / 10, - memory_mode=FrankWolfe.InplaceEmphasis(), verbose=true, trajectory=true, callback=build_callback(trajectoryBPCG_standard), @@ -76,25 +74,21 @@ active_set_sparse = FrankWolfe.ActiveSetSparsifier( FrankWolfe.ActiveSet([1.0], [x00], similar(x00)), HiGHS.Optimizer(), ) - @time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( f, grad!, lmo, - copy(x00), + active_set_sparse, max_iteration=k, line_search=FrankWolfe.Shortstep(2.0), - print_iter=k / 10, - memory_mode=FrankWolfe.InplaceEmphasis(), verbose=true, - trajectory=true, callback=build_callback(trajectoryBPCG_as_sparse), ); # Reduction primal/dual error vs. sparsity of solution -dataSparsity = [trajectoryBPCG_standard, trajectoryBPCG_quadratic] -labelSparsity = ["BPCG (Standard)", "BPCG (Sparsify)"] +plot_data = [trajectoryBPCG_standard, trajectoryBPCG_as_sparse] +plot_labels = ["BPCG (Standard)", "BPCG (Sparsify)"] # Plot sparsity -plot_sparsity(dataSparsity, labelSparsity, legend_position=:topright) +plot_sparsity(plot_data, plot_labels, legend_position=:topright) diff --git a/src/active_set_sparsifier.jl b/src/active_set_sparsifier.jl index 6bb523217..6a408a0ac 100644 --- a/src/active_set_sparsifier.jl +++ b/src/active_set_sparsifier.jl @@ -37,13 +37,13 @@ Base.deleteat!(as::ActiveSetSparsifier, idx::Int) = deleteat!(as.active_set, idx Base.empty!(as::ActiveSetSparsifier) = empty!(as.active_set) function active_set_update!( - as::ActiveSetSparsifier{AS,OT,AT,R,IT}, + as::ActiveSetSparsifier{AT,R,IT,AS}, lambda, atom, renorm=true, idx=nothing; kwargs..., -) where {AS,OT,AT,R,IT} +) where {AS,AT,R,IT} active_set_update!(as.active_set, lambda, atom, renorm, idx; kwargs...) n = length(as) as.counter[] += 1 From 87d8ec1a1249721a28095e085bb2d9baf101815e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Designolle?= <55443987+sebastiendesignolle@users.noreply.github.com> Date: Mon, 28 Oct 2024 12:39:27 +0100 Subject: [PATCH 09/21] Revert inverse syntax (#520) --- src/active_set_quadratic.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/active_set_quadratic.jl b/src/active_set_quadratic.jl index a616dd1fb..5739198f2 100644 --- a/src/active_set_quadratic.jl +++ b/src/active_set_quadratic.jl @@ -36,7 +36,7 @@ function detect_quadratic_function(grad!, x0; test=true) X[:, i] .-= x0 G[:, i] .= storage .- g0 end - A = G \ X + A = G * inv(X) b = g0 - A * x0 if test x_test = randn(T, n) From 8e094714d1a5309da506f7c2fafa11a723732366 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mathieu=20Besan=C3=A7on?= Date: Mon, 28 Oct 2024 13:34:39 +0100 Subject: [PATCH 10/21] Update Project.toml (#514) * Update Project.toml * update docs --- Project.toml | 2 +- docs/src/reference/3_backend.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Project.toml b/Project.toml index ca298fa1a..0949ccd84 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "FrankWolfe" uuid = "f55ce6ea-fdc5-4628-88c5-0087fe54bd30" authors = ["ZIB-IOL"] -version = "0.4.1" +version = "0.4.2" [deps] Arpack = "7d9fca2a-8960-54d3-9f78-7d1dccf2cb97" diff --git a/docs/src/reference/3_backend.md b/docs/src/reference/3_backend.md index 1856373e0..34e71f040 100644 --- a/docs/src/reference/3_backend.md +++ b/docs/src/reference/3_backend.md @@ -4,7 +4,7 @@ ```@autodocs Modules = [FrankWolfe] -Pages = ["active_set.jl"] +Pages = ["active_set.jl", "active_set_quadratic.jl", "active_set_quadratic_direct_solve.jl", "active_set_sparsifier.jl"] ``` ## Functions and gradients From 57520a75a3ca2262428c28c69cfc53f0bf15e2e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Designolle?= Date: Tue, 29 Oct 2024 15:20:11 +0100 Subject: [PATCH 11/21] Factorise constructor syntax (typed vs not typed) --- src/active_set.jl | 14 ++------------ src/active_set_quadratic.jl | 23 +---------------------- 2 files changed, 3 insertions(+), 34 deletions(-) diff --git a/src/active_set.jl b/src/active_set.jl index c02145666..f02177933 100644 --- a/src/active_set.jl +++ b/src/active_set.jl @@ -27,17 +27,7 @@ ActiveSet{AT,R}() where {AT,R} = ActiveSet{AT,R,Vector{float(eltype(AT))}}([], [ ActiveSet{AT}() where {AT} = ActiveSet{AT,Float64,Vector{float(eltype(AT))}}() function ActiveSet(tuple_values::AbstractVector{Tuple{R,AT}}) where {AT,R} - n = length(tuple_values) - weights = Vector{R}(undef, n) - atoms = Vector{AT}(undef, n) - @inbounds for idx in 1:n - weights[idx] = tuple_values[idx][1] - atoms[idx] = tuple_values[idx][2] - end - x = similar(atoms[1], float(eltype(atoms[1]))) - as = ActiveSet{AT,R,typeof(x)}(weights, atoms, x) - compute_active_set_iterate!(as) - return as + return ActiveSet{AT,R}(tuple_values) end function ActiveSet{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}) where {AT,R} @@ -48,7 +38,7 @@ function ActiveSet{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}) weights[idx] = tuple_values[idx][1] atoms[idx] = tuple_values[idx][2] end - x = similar(tuple_values[1][2], float(eltype(tuple_values[1][2]))) + x = similar(atoms[1], float(eltype(atoms[1]))) as = ActiveSet{AT,R,typeof(x)}(weights, atoms, x) compute_active_set_iterate!(as) return as diff --git a/src/active_set_quadratic.jl b/src/active_set_quadratic.jl index 5739198f2..adea1c10b 100644 --- a/src/active_set_quadratic.jl +++ b/src/active_set_quadratic.jl @@ -54,28 +54,7 @@ function ActiveSetQuadratic(tuple_values::AbstractVector{Tuple{R,AT}}, grad!::Fu end function ActiveSetQuadratic(tuple_values::AbstractVector{Tuple{R,AT}}, A::H, b) where {AT,R,H} - n = length(tuple_values) - weights = Vector{R}(undef, n) - atoms = Vector{AT}(undef, n) - dots_x = zeros(R, n) - dots_A = Vector{Vector{R}}(undef, n) - dots_b = Vector{R}(undef, n) - weights_prev = zeros(R, n) - modified = trues(n) - @inbounds for idx in 1:n - weights[idx] = tuple_values[idx][1] - atoms[idx] = tuple_values[idx][2] - dots_A[idx] = Vector{R}(undef, idx) - for idy in 1:idx - dots_A[idx][idy] = fast_dot(A * atoms[idx], atoms[idy]) - end - dots_b[idx] = fast_dot(b, atoms[idx]) - end - x = similar(b) - as = ActiveSetQuadratic(weights, atoms, x, A, b, dots_x, dots_A, dots_b, weights_prev, modified) - - compute_active_set_iterate!(as) - return as + return ActiveSetQuadratic{AT,R}(tuple_values, A, b) end function ActiveSetQuadratic{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, grad!::Function) where {AT,R} From d71ec0b6d67291bf4ffca319c473fef4313410ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Designolle?= Date: Tue, 29 Oct 2024 15:32:50 +0100 Subject: [PATCH 12/21] Isolate reset_quadratic_dots! outside the constructor --- src/active_set_quadratic.jl | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/src/active_set_quadratic.jl b/src/active_set_quadratic.jl index adea1c10b..08f8e4e96 100644 --- a/src/active_set_quadratic.jl +++ b/src/active_set_quadratic.jl @@ -74,18 +74,27 @@ function ActiveSetQuadratic{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number, @inbounds for idx in 1:n weights[idx] = tuple_values[idx][1] atoms[idx] = tuple_values[idx][2] - dots_A[idx] = Vector{R}(undef, idx) - for idy in 1:idx - dots_A[idx][idy] = fast_dot(A * atoms[idx], atoms[idy]) - end - dots_b[idx] = fast_dot(b, atoms[idx]) end x = similar(b) as = ActiveSetQuadratic{AT,R,typeof(x),H}(weights, atoms, x, A, b, dots_x, dots_A, dots_b, weights_prev, modified) + reset_quadratic_dots!(as) compute_active_set_iterate!(as) return as end +# should only be called upon construction +# for active sets with a large number of atoms, this function becomes very costly +function reset_quadratic_dots!(as::ActiveSetQuadratic{AT,R}) where {AT,R} + @inbounds for idx in 1:length(as) + as.dots_A[idx] = Vector{R}(undef, idx) + for idy in 1:idx + as.dots_A[idx][idy] = fast_dot(as.A * as.atoms[idx], as.atoms[idy]) + end + as.dots_b[idx] = fast_dot(as.b, as.atoms[idx]) + end + return as +end + # custom dummy structure to handle identity hessian matrix # required as LinearAlgebra.I does not work for general tensors struct Identity{R <: Real} From f9cdd9f248af465a83ac7f877318534f2880acf4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mathieu=20Besan=C3=A7on?= Date: Tue, 29 Oct 2024 17:29:32 +0100 Subject: [PATCH 13/21] tolerance for non-diagonal and test (#521) * tolerance for non-diagonal and test * separate namespaces * factorize active set direct solve by Hessian * fix type not necessarily matrix * fix type --- src/active_set_quadratic_direct_solve.jl | 64 +++---------- test/as_quadratic_projection.jl | 116 +++++++++++++++++++++++ test/runtests.jl | 7 ++ 3 files changed, 135 insertions(+), 52 deletions(-) create mode 100644 test/as_quadratic_projection.jl diff --git a/src/active_set_quadratic_direct_solve.jl b/src/active_set_quadratic_direct_solve.jl index fef53a077..c90ab0a8e 100644 --- a/src/active_set_quadratic_direct_solve.jl +++ b/src/active_set_quadratic_direct_solve.jl @@ -262,8 +262,8 @@ Solves the auxiliary LP over the current active set. The method is specialized by type `H` of the Hessian matrix `A`. """ function solve_quadratic_activeset_lp!( - as::ActiveSetQuadraticLinearSolve{AT,R,IT,<:AbstractMatrix}, -) where {AT,R,IT} + as::ActiveSetQuadraticLinearSolve{AT,R,IT,H}, +) where {AT,R,IT,H} nv = length(as) o = as.lp_optimizer MOI.empty!(o) @@ -278,7 +278,10 @@ function solve_quadratic_activeset_lp!( Base.sizehint!(lhs.terms, nv) # replaces direct sum because of MOI and MutableArithmetic slow sums for j in 1:nv - push!(lhs.terms, MOI.ScalarAffineTerm(fast_dot(atom, as.A, as.atoms[j]), λ[j])) + push!( + lhs.terms, + _compute_quadratic_constraint_term(atom, as.A, as.atoms[j], λ[j]), + ) end rhs = -dot(atom, as.b) MOI.add_constraint(o, lhs, MOI.EqualTo(rhs)) @@ -293,7 +296,7 @@ function solve_quadratic_activeset_lp!( new_weights = R[] for idx in eachindex(λ) weight_value = MOI.get(o, MOI.VariablePrimal(), λ[idx]) - if weight_value <= 1e-5 + if weight_value <= 2 * weight_purge_threshold_default(typeof(weight_value)) push!(indices_to_remove, idx) else push!(new_weights, weight_value) @@ -308,55 +311,12 @@ function solve_quadratic_activeset_lp!( return as end -# special case of scaled identity Hessian +function _compute_quadratic_constraint_term(atom1, A::AbstractMatrix, atom2, λ) + return MOI.ScalarAffineTerm(fast_dot(atom1, A, atom2), λ) +end -function solve_quadratic_activeset_lp!( - as::ActiveSetQuadraticLinearSolve{AT,R,IT,<:Union{Identity,LinearAlgebra.UniformScaling}}, -) where {AT,R,IT} - hessian_scaling = as.A.λ - nv = length(as) - o = as.lp_optimizer - MOI.empty!(o) - λ = MOI.add_variables(o, nv) - # λ ≥ 0, ∑ λ == 1 - MOI.add_constraint.(o, λ, MOI.GreaterThan(0.0)) - sum_of_variables = MOI.ScalarAffineFunction(MOI.ScalarAffineTerm.(1.0, λ), 0.0) - MOI.add_constraint(o, sum_of_variables, MOI.EqualTo(1.0)) - # a Aᵗ A λ == -Aᵗ b - for atom in as.atoms - lhs = MOI.ScalarAffineFunction{Float64}([], 0.0) - Base.sizehint!(lhs.terms, nv) - # replaces direct sum because of MOI and MutableArithmetic slow sums - for j in 1:nv - push!(lhs.terms, MOI.ScalarAffineTerm(hessian_scaling * dot(atom, as.atoms[j]), λ[j])) - end - rhs = -dot(atom, as.b) - MOI.add_constraint(o, lhs, MOI.EqualTo(rhs)) - end - MOI.set(o, MOI.ObjectiveFunction{typeof(sum_of_variables)}(), sum_of_variables) - MOI.set(o, MOI.ObjectiveSense(), MOI.MIN_SENSE) - MOI.optimize!(o) - if MOI.get(o, MOI.TerminationStatus()) ∉ (MOI.OPTIMAL, MOI.FEASIBLE_POINT, MOI.ALMOST_OPTIMAL) - return as - end - indices_to_remove = Int[] - new_weights = R[] - for idx in eachindex(λ) - weight_value = MOI.get(o, MOI.VariablePrimal(), λ[idx]) - if weight_value <= 2 * weight_purge_threshold_default(typeof(weight_value)) - push!(indices_to_remove, idx) - else - push!(new_weights, weight_value) - end - end - deleteat!(as.active_set, indices_to_remove) - @assert length(as) == length(new_weights) - update_weights!(as.active_set, new_weights) - active_set_cleanup!(as) - @assert all(>=(0), new_weights) - active_set_renormalize!(as) - compute_active_set_iterate!(as) - return as +function _compute_quadratic_constraint_term(atom1, A::Union{Identity,LinearAlgebra.UniformScaling}, atom2, λ) + return MOI.ScalarAffineTerm(A.λ * fast_dot(atom1, atom2), λ) end struct LogScheduler{T} diff --git a/test/as_quadratic_projection.jl b/test/as_quadratic_projection.jl new file mode 100644 index 000000000..2060d1997 --- /dev/null +++ b/test/as_quadratic_projection.jl @@ -0,0 +1,116 @@ +using FrankWolfe +using LinearAlgebra +using Random + +import HiGHS +import MathOptInterface as MOI +using Test + +n = Int(1e2) +k = 10000 + +# s = rand(1:100) +s = 10 +@info "Seed $s" +Random.seed!(s) + + +xpi = rand(n); +total = sum(xpi); + +xp = xpi ./ total; + +f(x) = norm(x - xp)^2 +function grad!(storage, x) + @. storage = 2 * (x - xp) +end + +# lmo = FrankWolfe.KSparseLMO(5, 1000.0) + +## other LMOs to try +# lmo_big = FrankWolfe.KSparseLMO(100, big"1.0") +# lmo = FrankWolfe.LpNormLMO{Float64,5}(100.0) +# lmo = FrankWolfe.ProbabilitySimplexOracle(100.0); +lmo = FrankWolfe.UnitSimplexOracle(10000.0); + +x00 = FrankWolfe.compute_extreme_point(lmo, rand(n)) + + +function build_callback(trajectory_arr) + return function callback(state, active_set, args...) + return push!(trajectory_arr, (FrankWolfe.callback_state(state)..., length(active_set))) + end +end + + +trajectoryBPCG_standard = [] +x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + copy(x00), + max_iteration=k, + callback=build_callback(trajectoryBPCG_standard), +); + + +active_set_quadratic_automatic_standard = FrankWolfe.ActiveSetQuadraticLinearSolve( + FrankWolfe.ActiveSet([(1.0, copy(x00))]), + grad!, + MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)), + scheduler=FrankWolfe.LogScheduler(start_time=10, scaling_factor=1), +) +trajectoryBPCG_quadratic_automatic_standard = [] +x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + active_set_quadratic_automatic_standard, + max_iteration=k, + callback=build_callback(trajectoryBPCG_quadratic_automatic_standard), +); + +active_set_quadratic_automatic = FrankWolfe.ActiveSetQuadraticLinearSolve( + [(1.0, copy(x00))], + grad!, + MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)), + scheduler=FrankWolfe.LogScheduler(start_time=10, scaling_factor=1), +) +trajectoryBPCG_quadratic_automatic = [] +x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + active_set_quadratic_automatic, + max_iteration=k, + callback=build_callback(trajectoryBPCG_quadratic_automatic), +); + +active_set_quadratic_manual = FrankWolfe.ActiveSetQuadraticLinearSolve( + FrankWolfe.ActiveSet([(1.0, copy(x00))]), + 2.0 * I, -2xp, + MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)), + scheduler=FrankWolfe.LogScheduler(start_time=10, scaling_factor=1), +) +trajectoryBPCG_quadratic_manual = [] +x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + active_set_quadratic_manual, + max_iteration=k, + callback=build_callback(trajectoryBPCG_quadratic_manual), +); + +traj_data = [ + trajectoryBPCG_standard, + trajectoryBPCG_quadratic_automatic_standard, + trajectoryBPCG_quadratic_automatic, + trajectoryBPCG_quadratic_manual, +] + +# all should have converged +for traj in traj_data + @test traj[end][2] ≤ 1e-8 + @test traj[end][4] ≤ 1e-7 +end diff --git a/test/runtests.jl b/test/runtests.jl index 40f5ac366..112582329 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -805,6 +805,13 @@ using Test end end +module LpDirectSolveTestProjection +using Test +@testset "LP solving for quadratic functions and active set" begin + include("as_quadratic_projection.jl") +end +end + module SparsifyingActiveSetTest using Test @testset "Sparsifying active set" begin From e805431823dbe5ec513ef8430fa79232d131a7dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Designolle?= <55443987+sebastiendesignolle@users.noreply.github.com> Date: Sun, 3 Nov 2024 08:45:05 +0100 Subject: [PATCH 14/21] Refactor ActiveSetQuadratic and SymmetricXXX (#524) * ActiveSetQuadratic -> ActiveSetQuadraticCachedProducts * SymmetricLMO -> SubspaceLMO * reduce -> deflate * SymmetricArray -> SubspaceVector * Cap length of test strings to 50 characters * Suppress verbosity * Fix docstrings for Subspace LMO & Vector * ActiveSetQuadraticCachedProducts -> ActiveSetQuadraticProductCaching --- docs/src/advanced.md | 4 +- examples/birkhoff_polytope.jl | 2 +- examples/birkhoff_polytope_symmetric.jl | 32 +++---- examples/blended_pairwise_with_direct.jl | 4 +- examples/docs_12_quadratic_symmetric.jl | 52 ++++++------ examples/quadratic.jl | 2 +- examples/quadratic_A.jl | 2 +- examples/reynolds.jl | 4 +- examples/symmetric.jl | 14 ++-- src/abstract_oracles.jl | 25 +++--- src/active_set_quadratic.jl | 50 +++++------ src/active_set_quadratic_direct_solve.jl | 6 +- src/pairwise.jl | 6 +- src/types.jl | 71 ++++++++-------- test/active_set.jl | 12 +-- test/active_set_variants.jl | 8 +- test/alternating_methods_tests.jl | 18 ++-- test/as_quadratic_projection.jl | 2 +- test/bcg_direction_error.jl | 4 +- test/benchmark.jl | 1 - test/blocks.jl | 4 +- test/extra_storage.jl | 6 +- test/function_gradient.jl | 2 +- test/generic-arrays.jl | 10 +-- test/lmo.jl | 44 +++++----- test/momentum_memory.jl | 4 +- test/oddities.jl | 14 ++-- test/quadratic_lp_active_set.jl | 6 +- test/rational_test.jl | 4 +- test/runtests.jl | 83 +++++++++---------- test/sparsifying_activeset.jl | 2 +- test/timeout.jl | 6 +- test/tracking.jl | 8 +- test/trajectory_tests/open_loop_parametric.jl | 8 +- test/utils.jl | 12 +-- 35 files changed, 271 insertions(+), 261 deletions(-) diff --git a/docs/src/advanced.md b/docs/src/advanced.md index a404fad85..ea438ca74 100644 --- a/docs/src/advanced.md +++ b/docs/src/advanced.md @@ -53,7 +53,7 @@ See [Extra-lazification](@ref) for a complete example. ## Specialized active set for quadratic functions -If the objective function is quadratic, a considerable speedup can be obtained by using the structure `ActiveSetQuadratic`. +If the objective function is quadratic, a considerable speedup can be obtained by using the structure `ActiveSetQuadraticProductCaching`. It relies on the storage of various scalar products to efficiently determine the best (and worst for `blended_pairwise_conditional_gradient`) atom in the active set without the need of computing many scalar products in each iteration. The user should provide the Hessian matrix `A` as well as the linear part `b` of the function, such that: ```math @@ -228,6 +228,6 @@ This subspace is the image of the Reynolds operator defined by \mathcal{R}(x)=\frac{1}{|G|}\sum_{g\in G}g\cdot x. ``` -In practice, the type `SymmetricLMO` allows the user to provide the Reynolds operator $\mathcal{R}$ as well as its adjoint $\mathcal{R}^\ast$. +In practice, the type `SubspaceLMO` allows the user to provide the Reynolds operator $\mathcal{R}$ as well as its adjoint $\mathcal{R}^\ast$. The gradient is symmetrised with $\mathcal{R}^\ast$, then passed to the non-symmetric LMO, and the resulting output is symmetrised with $\mathcal{R}$. In many cases, the gradient is already symmetric so that `reynolds_adjoint(gradient, lmo) = gradient` is a fast and valid choice. diff --git a/examples/birkhoff_polytope.jl b/examples/birkhoff_polytope.jl index e181ba69b..c81771ca7 100644 --- a/examples/birkhoff_polytope.jl +++ b/examples/birkhoff_polytope.jl @@ -54,7 +54,7 @@ FrankWolfe.benchmark_oracles( x -> cf(x, xp, normxp2), (str, x) -> cgrad!(str, x, xp), lmo, - FrankWolfe.ActiveSetQuadratic([(1.0, collect(x0))], 2I/n^2, -2xp/n^2); # surprisingly faster and more memory efficient with collect + FrankWolfe.ActiveSetQuadraticProductCaching([(1.0, collect(x0))], 2I/n^2, -2xp/n^2); # surprisingly faster and more memory efficient with collect max_iteration=k, line_search=FrankWolfe.Shortstep(2/n^2), lazy=true, diff --git a/examples/birkhoff_polytope_symmetric.jl b/examples/birkhoff_polytope_symmetric.jl index db1c79a55..8e1c3933d 100644 --- a/examples/birkhoff_polytope_symmetric.jl +++ b/examples/birkhoff_polytope_symmetric.jl @@ -36,7 +36,7 @@ x0 = FrankWolfe.compute_extreme_point(lmo_nat, randn(n, n)) x -> cf(x, xp, normxp2), (str, x) -> cgrad!(str, x, xp), lmo_nat, - FrankWolfe.ActiveSetQuadratic([(1.0, x0)], 2I/n^2, -2xp/n^2); + FrankWolfe.ActiveSetQuadraticProductCaching([(1.0, x0)], 2I/n^2, -2xp/n^2); max_iteration=k, line_search=FrankWolfe.Shortstep(2/n^2), lazy=true, @@ -48,15 +48,15 @@ x0 = FrankWolfe.compute_extreme_point(lmo_nat, randn(n, n)) # here the problem is invariant under mirror symmetry around the diagonal and the anti-diagonal # each solution of the LMO can then be added to the active set together with its orbit # on top of that, the effective dimension of the space is reduced -# the following function constructs the functions `reduce` and `inflate` needed for SymmetricLMO -# `reduce` maps a matrix to the invariant vector space +# the following function constructs the functions `deflate` and `inflate` needed for SubspaceLMO +# `deflate` maps a matrix to the invariant vector space # `inflate` maps a vector in this space back to a matrix -# using `FrankWolfe.SymmetricArray` is a convenience to avoid reallocating the result of `inflate` -function build_reduce_inflate(p::Matrix{T}) where {T <: Number} +# using `FrankWolfe.SubspaceVector` is a convenience to avoid reallocating the result of `inflate` +function build_deflate_inflate(p::Matrix{T}) where {T <: Number} n = size(p, 1) @assert n == size(p, 2) # square matrix - dimension = floor(Int, (n+1)^2 / 4) # reduced dimension - function reduce(A::AbstractMatrix{T}, lmo) + dimension = floor(Int, (n+1)^2 / 4) # deflated dimension + function deflate(A::AbstractMatrix{T}, lmo) vec = Vector{T}(undef, dimension) cnt = 0 @inbounds for i in 1:(n+1)÷2, j in i:n+1-i @@ -75,9 +75,9 @@ function build_reduce_inflate(p::Matrix{T}) where {T <: Number} end end end - return FrankWolfe.SymmetricArray(A, vec) + return FrankWolfe.SubspaceVector(A, vec) end - function inflate(x::FrankWolfe.SymmetricArray, lmo) + function inflate(x::FrankWolfe.SubspaceVector, lmo) cnt = 0 @inbounds for i in 1:(n+1)÷2, j in i:n+1-i cnt += 1 @@ -102,22 +102,22 @@ function build_reduce_inflate(p::Matrix{T}) where {T <: Number} end return x.data end - return reduce, inflate + return deflate, inflate end -reduce, inflate = build_reduce_inflate(xpi) -const rxp = reduce(xpi, nothing) -@assert dot(rxp, rxp) ≈ normxp2 # should be correct thanks to the factors sqrt(2) and 2 in reduce and inflate +deflate, inflate = build_deflate_inflate(xpi) +const rxp = deflate(xpi, nothing) +@assert dot(rxp, rxp) ≈ normxp2 # should be correct thanks to the factors sqrt(2) and 2 in deflate and inflate -lmo_sym = FrankWolfe.SymmetricLMO(lmo_nat, reduce, inflate) +lmo_sym = FrankWolfe.SubspaceLMO(lmo_nat, deflate, inflate) -rx0 = FrankWolfe.compute_extreme_point(lmo_sym, reduce(sparse(randn(n, n)), nothing)) +rx0 = FrankWolfe.compute_extreme_point(lmo_sym, deflate(sparse(randn(n, n)), nothing)) @time rx, rv, rprimal, rdual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( x -> cf(x, rxp, normxp2), (str, x) -> cgrad!(str, x, rxp), lmo_sym, - FrankWolfe.ActiveSetQuadratic([(1.0, rx0)], 2I/n^2, -2rxp/n^2); + FrankWolfe.ActiveSetQuadraticProductCaching([(1.0, rx0)], 2I/n^2, -2rxp/n^2); max_iteration=k, line_search=FrankWolfe.Shortstep(2/n^2), lazy=true, diff --git a/examples/blended_pairwise_with_direct.jl b/examples/blended_pairwise_with_direct.jl index 9a30bde76..828436201 100644 --- a/examples/blended_pairwise_with_direct.jl +++ b/examples/blended_pairwise_with_direct.jl @@ -63,7 +63,7 @@ trajectoryBPCG_standard = [] # Just projection quadratic trajectoryBPCG_quadratic = [] -as_quad = FrankWolfe.ActiveSetQuadratic([(1.0, copy(x00))], 2 * LinearAlgebra.I, -2xp) +as_quad = FrankWolfe.ActiveSetQuadraticProductCaching([(1.0, copy(x00))], 2 * LinearAlgebra.I, -2xp) @time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( f, grad!, @@ -75,7 +75,7 @@ as_quad = FrankWolfe.ActiveSetQuadratic([(1.0, copy(x00))], 2 * LinearAlgebra.I, callback=build_callback(trajectoryBPCG_quadratic), ); -as_quad = FrankWolfe.ActiveSetQuadratic([(1.0, copy(x00))], 2 * LinearAlgebra.I, -2xp) +as_quad = FrankWolfe.ActiveSetQuadraticProductCaching([(1.0, copy(x00))], 2 * LinearAlgebra.I, -2xp) # with quadratic active set trajectoryBPCG_quadratic_as = [] diff --git a/examples/docs_12_quadratic_symmetric.jl b/examples/docs_12_quadratic_symmetric.jl index 82ca5bd12..160c7e86e 100644 --- a/examples/docs_12_quadratic_symmetric.jl +++ b/examples/docs_12_quadratic_symmetric.jl @@ -1,7 +1,7 @@ # # Accelerations for quadratic functions and symmetric problems -# This example illustrates how to exploit symmetry to reduce the dimension of the problem via `SymmetricLMO`. -# Moreover, active set based algorithms can be accelerated by using the specialized structure `ActiveSetQuadratic`. +# This example illustrates how to exploit symmetry to reduce the dimension of the problem via `SubspaceLMO`. +# Moreover, active set based algorithms can be accelerated by using the specialized structure `ActiveSetQuadraticProductCaching`. # The specific problem we consider here comes from quantum information and some context can be found [here](https://arxiv.org/abs/2302.04721). # Formally, we want to find the distance between a tensor of size `m^N` and the `N`-partite local polytope which is defined by its vertices @@ -124,11 +124,11 @@ println() #hide # A first acceleration can be obtained by using the active set specialized for the quadratic objective function, # whose gradient is here ``x-p``, explaining the hessian and linear part provided as arguments. -# The speedup is obtained by pre-computing some scalar products to quickly obtained, in each iteration, the best and worst -# atoms currently in the active set. +# The speedup is obtained by pre-computing some scalar products to quickly obtained, in each iteration, +# the best and worst atoms currently in the active set. -FrankWolfe.blended_pairwise_conditional_gradient(f, grad!, lmo_naive, FrankWolfe.ActiveSetQuadratic([(one(T), x0)], LinearAlgebra.I, -p); verbose=false, lazy=true, line_search=FrankWolfe.Shortstep(one(T)), max_iteration=10) #hide -asq_naive = FrankWolfe.ActiveSetQuadratic([(one(T), x0)], LinearAlgebra.I, -p) +FrankWolfe.blended_pairwise_conditional_gradient(f, grad!, lmo_naive, FrankWolfe.ActiveSetQuadraticProductCaching([(one(T), x0)], LinearAlgebra.I, -p); verbose=false, lazy=true, line_search=FrankWolfe.Shortstep(one(T)), max_iteration=10) #hide +asq_naive = FrankWolfe.ActiveSetQuadraticProductCaching([(one(T), x0)], LinearAlgebra.I, -p) @time FrankWolfe.blended_pairwise_conditional_gradient(f, grad!, lmo_naive, asq_naive; verbose, lazy=true, line_search=FrankWolfe.Shortstep(one(T)), max_iteration) println() #hide @@ -165,9 +165,9 @@ println() #hide # a very small speedup by precomputing and storing `Combinatorics.permutations(1:N)` # in a dedicated field of our custom LMO. -lmo_permutedims = FrankWolfe.SymmetricLMO(lmo_naive, reynolds_permutedims) -FrankWolfe.blended_pairwise_conditional_gradient(f, grad!, lmo_permutedims, FrankWolfe.ActiveSetQuadratic([(one(T), x0)], LinearAlgebra.I, -p); verbose=false, lazy=true, line_search=FrankWolfe.Shortstep(one(T)), max_iteration=10) #hide -asq_permutedims = FrankWolfe.ActiveSetQuadratic([(one(T), x0)], LinearAlgebra.I, -p) +lmo_permutedims = FrankWolfe.SubspaceLMO(lmo_naive, reynolds_permutedims) +FrankWolfe.blended_pairwise_conditional_gradient(f, grad!, lmo_permutedims, FrankWolfe.ActiveSetQuadraticProductCaching([(one(T), x0)], LinearAlgebra.I, -p); verbose=false, lazy=true, line_search=FrankWolfe.Shortstep(one(T)), max_iteration=10) #hide +asq_permutedims = FrankWolfe.ActiveSetQuadraticProductCaching([(one(T), x0)], LinearAlgebra.I, -p) @time FrankWolfe.blended_pairwise_conditional_gradient(f, grad!, lmo_permutedims, asq_permutedims; verbose, lazy=true, line_search=FrankWolfe.Shortstep(one(T)), max_iteration) println() #hide @@ -197,9 +197,9 @@ function build_reynolds_unique(p::Array{T, N}) where {T <: Number, N} end end -lmo_unique = FrankWolfe.SymmetricLMO(lmo_naive, build_reynolds_unique(p)) -FrankWolfe.blended_pairwise_conditional_gradient(f, grad!, lmo_unique, FrankWolfe.ActiveSetQuadratic([(one(T), x0)], LinearAlgebra.I, -p); verbose=false, lazy=true, line_search=FrankWolfe.Shortstep(one(T)), max_iteration=10) #hide -asq_unique = FrankWolfe.ActiveSetQuadratic([(one(T), x0)], LinearAlgebra.I, -p) +lmo_unique = FrankWolfe.SubspaceLMO(lmo_naive, build_reynolds_unique(p)) +FrankWolfe.blended_pairwise_conditional_gradient(f, grad!, lmo_unique, FrankWolfe.ActiveSetQuadraticProductCaching([(one(T), x0)], LinearAlgebra.I, -p); verbose=false, lazy=true, line_search=FrankWolfe.Shortstep(one(T)), max_iteration=10) #hide +asq_unique = FrankWolfe.ActiveSetQuadraticProductCaching([(one(T), x0)], LinearAlgebra.I, -p) @time FrankWolfe.blended_pairwise_conditional_gradient(f, grad!, lmo_unique, asq_unique; verbose, lazy=true, line_search=FrankWolfe.Shortstep(one(T)), max_iteration) println() #hide @@ -214,7 +214,7 @@ println() #hide # accelerations, especially for active set based algorithms in the regime where many lazy iterations are performed. # We refer to the example `symmetric.jl` for a small benchmark with symmetric matrices. -function build_reduce_inflate(p::Array{T, N}) where {T <: Number, N} +function build_deflate_inflate(p::Array{T, N}) where {T <: Number, N} ptol = round.(p; digits=8) ptol[ptol .== zero(T)] .= zero(T) # transform -0.0 into 0.0 as isequal(0.0, -0.0) is false uniquetol = unique(ptol[:]) @@ -227,8 +227,8 @@ function build_reduce_inflate(p::Array{T, N}) where {T <: Number, N} for (i, ind) in enumerate(indices) vec[i] = sum(A[ind]) / sqmul[i] end - return FrankWolfe.SymmetricArray(A, vec) - end, function(x::FrankWolfe.SymmetricArray, lmo) + return FrankWolfe.SubspaceVector(A, vec) + end, function(x::FrankWolfe.SubspaceVector, lmo) for (i, ind) in enumerate(indices) @view(x.data[ind]) .= x.vec[i] / sqmul[i] end @@ -236,16 +236,16 @@ function build_reduce_inflate(p::Array{T, N}) where {T <: Number, N} end end -reduce, inflate = build_reduce_inflate(p) -p_reduce = reduce(p, nothing) -x0_reduce = reduce(x0, nothing) -f_reduce = let p_reduce = p_reduce, normp2 = normp2 - x -> LinearAlgebra.dot(x, x) / 2 - LinearAlgebra.dot(p_reduce, x) + normp2 +deflate, inflate = build_deflate_inflate(p) +p_deflate = deflate(p, nothing) +x0_deflate = deflate(x0, nothing) +f_deflate = let p_deflate = p_deflate, normp2 = normp2 + x -> LinearAlgebra.dot(x, x) / 2 - LinearAlgebra.dot(p_deflate, x) + normp2 end -grad_reduce! = let p_reduce = p_reduce +grad_deflate! = let p_deflate = p_deflate (storage, x) -> begin @inbounds for i in eachindex(x) - storage[i] = x[i] - p_reduce[i] + storage[i] = x[i] - p_deflate[i] end end end @@ -255,9 +255,9 @@ println() #hide # In this simple example, their shape remains unchanged, but in general this may need some # reformulation, which falls to the user. -lmo_reduce = FrankWolfe.SymmetricLMO(lmo_naive, reduce, inflate) -FrankWolfe.blended_pairwise_conditional_gradient(f_reduce, grad_reduce!, lmo_reduce, FrankWolfe.ActiveSetQuadratic([(one(T), x0_reduce)], LinearAlgebra.I, -p_reduce); verbose=false, lazy=true, line_search=FrankWolfe.Shortstep(one(T)), max_iteration=10) #hide -asq_reduce = FrankWolfe.ActiveSetQuadratic([(one(T), x0_reduce)], LinearAlgebra.I, -p_reduce) -@time FrankWolfe.blended_pairwise_conditional_gradient(f_reduce, grad_reduce!, lmo_reduce, asq_reduce; verbose, lazy=true, line_search=FrankWolfe.Shortstep(one(T)), max_iteration) +lmo_deflate = FrankWolfe.SubspaceLMO(lmo_naive, deflate, inflate) +FrankWolfe.blended_pairwise_conditional_gradient(f_deflate, grad_deflate!, lmo_deflate, FrankWolfe.ActiveSetQuadraticProductCaching([(one(T), x0_deflate)], LinearAlgebra.I, -p_deflate); verbose=false, lazy=true, line_search=FrankWolfe.Shortstep(one(T)), max_iteration=10) #hide +asq_deflate = FrankWolfe.ActiveSetQuadraticProductCaching([(one(T), x0_deflate)], LinearAlgebra.I, -p_deflate) +@time FrankWolfe.blended_pairwise_conditional_gradient(f_deflate, grad_deflate!, lmo_deflate, asq_deflate; verbose, lazy=true, line_search=FrankWolfe.Shortstep(one(T)), max_iteration) println() #hide diff --git a/examples/quadratic.jl b/examples/quadratic.jl index 923fbc4f3..450ea0901 100644 --- a/examples/quadratic.jl +++ b/examples/quadratic.jl @@ -74,7 +74,7 @@ function benchmark_Bell(p::Array{T, 2}, quadratic::Bool; fw_method=FrankWolfe.bl lmo = BellCorrelationsLMOHeuristic{T}(size(p, 1), zeros(T, size(p, 1))) x0 = FrankWolfe.compute_extreme_point(lmo, -p) if quadratic - active_set = FrankWolfe.ActiveSetQuadratic([(one(T), x0)], I, -p) + active_set = FrankWolfe.ActiveSetQuadraticProductCaching([(one(T), x0)], I, -p) else active_set = FrankWolfe.ActiveSet([(one(T), x0)]) end diff --git a/examples/quadratic_A.jl b/examples/quadratic_A.jl index 5ac879c47..c65643a82 100644 --- a/examples/quadratic_A.jl +++ b/examples/quadratic_A.jl @@ -48,7 +48,7 @@ x0 = FrankWolfe.compute_extreme_point(lmo, zeros(T, n+1)) # active_set = FrankWolfe.ActiveSet([(1.0, x0)]) # specialized active set, automatically detecting the parameters A and b of the quadratic function f -active_set = FrankWolfe.ActiveSetQuadratic([(one(T), x0)], gradf) +active_set = FrankWolfe.ActiveSetQuadraticProductCaching([(one(T), x0)], gradf) @time res = FrankWolfe.blended_pairwise_conditional_gradient( # @time res = FrankWolfe.away_frank_wolfe( diff --git a/examples/reynolds.jl b/examples/reynolds.jl index fe28bca53..0cb6f3c32 100644 --- a/examples/reynolds.jl +++ b/examples/reynolds.jl @@ -86,12 +86,12 @@ function benchmark_Bell(p::Array{T, 3}, sym::Bool; kwargs...) where {T <: Number end lmo = BellCorrelationsLMO{T}(size(p, 1), zeros(T, size(p, 1))) if sym - lmo = FrankWolfe.SymmetricLMO(lmo, reynolds_permutedims, reynolds_adjoint) + lmo = FrankWolfe.SubspaceLMO(lmo, reynolds_permutedims, reynolds_adjoint) end x0 = FrankWolfe.compute_extreme_point(lmo, -p) println("Output type of the LMO: ", typeof(x0)) active_set = FrankWolfe.ActiveSet([(one(T), x0)]) - # active_set = FrankWolfe.ActiveSetQuadratic([(one(T), x0)], I, -p) + # active_set = FrankWolfe.ActiveSetQuadraticProductCaching([(one(T), x0)], I, -p) return FrankWolfe.blended_pairwise_conditional_gradient(f, grad!, lmo, active_set; lazy=true, line_search=FrankWolfe.Shortstep(one(T)), kwargs...) end diff --git a/examples/symmetric.jl b/examples/symmetric.jl index 000ca53bc..1d1018cf9 100644 --- a/examples/symmetric.jl +++ b/examples/symmetric.jl @@ -57,7 +57,7 @@ function correlation_tensor_GHZ_polygon(N::Int, m::Int; type=Float64) return res end -function build_reduce_inflate_permutedims(p::Array{T, 2}) where {T <: Number} +function build_deflate_inflate_permutedims(p::Array{T, 2}) where {T <: Number} n = size(p, 1) @assert n == size(p, 2) dimension = (n * (n + 1)) ÷ 2 @@ -72,8 +72,8 @@ function build_reduce_inflate_permutedims(p::Array{T, 2}) where {T <: Number} vec[cnt+j] = (A[i, j] + A[j, i]) / sqrt2 end end - return FrankWolfe.SymmetricArray(A, vec) - end, function(x::FrankWolfe.SymmetricArray, lmo) + return FrankWolfe.SubspaceVector(A, vec) + end, function(x::FrankWolfe.SubspaceVector, lmo) cnt = 0 @inbounds for i in 1:n x.data[i, i] = x.vec[i] @@ -90,9 +90,9 @@ end function benchmark_Bell(p::Array{T, 2}, sym::Bool; fw_method=FrankWolfe.blended_pairwise_conditional_gradient, kwargs...) where {T <: Number} Random.seed!(0) if sym - reduce, inflate = build_reduce_inflate_permutedims(p) - lmo = FrankWolfe.SymmetricLMO(BellCorrelationsLMOHeuristic{T}(size(p, 1), zeros(T, size(p, 1))), reduce, inflate) - p = reduce(p, lmo) + deflate, inflate = build_deflate_inflate_permutedims(p) + lmo = FrankWolfe.SubspaceLMO(BellCorrelationsLMOHeuristic{T}(size(p, 1), zeros(T, size(p, 1))), deflate, inflate) + p = deflate(p, lmo) else lmo = BellCorrelationsLMOHeuristic{T}(size(p, 1), zeros(T, size(p, 1))) end @@ -109,7 +109,7 @@ function benchmark_Bell(p::Array{T, 2}, sym::Bool; fw_method=FrankWolfe.blended_ end end x0 = FrankWolfe.compute_extreme_point(lmo, -p) - active_set = FrankWolfe.ActiveSetQuadratic([(one(T), x0)], I, -p) + active_set = FrankWolfe.ActiveSetQuadraticProductCaching([(one(T), x0)], I, -p) res = fw_method(f, grad!, lmo, active_set; line_search=FrankWolfe.Shortstep(one(T)), lazy=true, verbose=false, max_iteration=10^2) return fw_method(f, grad!, lmo, res[6]; line_search=FrankWolfe.Shortstep(one(T)), lazy=true, lazy_tolerance=10^6, kwargs...) end diff --git a/src/abstract_oracles.jl b/src/abstract_oracles.jl index 5b61960f0..c1c1154e4 100644 --- a/src/abstract_oracles.jl +++ b/src/abstract_oracles.jl @@ -258,23 +258,28 @@ function compute_extreme_point( end """ - SymmetricLMO{LMO, TR, TI} + SubspaceLMO{LMO, TD, TI} -Symmetric LMO for the reduction operator defined by `TR` +Subspace LMO for the deflation operator defined by `TD` and the inflation operator defined by `TI`. -Computations are performed in the reduced subspace, and the +Computations are performed in the deflated subspace, and the effective call of the LMO first inflates the gradient, then -use the non-symmetric LMO, and finally reduces the output. +uses the full LMO, and finally deflates the output. + +Deflation operators typically project onto symmetric subspaces +or select relevant elements of a full tensor. +Scalar products should be treated carefully to ensure correctness +of the results; see also the companion `SubspaceVector` structure. """ -struct SymmetricLMO{LMO<:LinearMinimizationOracle,TR,TI} <: LinearMinimizationOracle +struct SubspaceLMO{LMO<:LinearMinimizationOracle,TD,TI} <: LinearMinimizationOracle lmo::LMO - reduce::TR + deflate::TD inflate::TI - function SymmetricLMO(lmo::LMO, reduce, inflate=(x, lmo) -> x) where {LMO<:LinearMinimizationOracle} - return new{typeof(lmo),typeof(reduce),typeof(inflate)}( lmo, reduce, inflate) + function SubspaceLMO(lmo::LMO, deflate, inflate=(x, lmo) -> x) where {LMO<:LinearMinimizationOracle} + return new{typeof(lmo),typeof(deflate),typeof(inflate)}( lmo, deflate, inflate) end end -function compute_extreme_point(sym::SymmetricLMO, direction; kwargs...) - return sym.reduce(compute_extreme_point(sym.lmo, sym.inflate(direction, sym.lmo)), sym.lmo) +function compute_extreme_point(sym::SubspaceLMO, direction; kwargs...) + return sym.deflate(compute_extreme_point(sym.lmo, sym.inflate(direction, sym.lmo)), sym.lmo) end diff --git a/src/active_set_quadratic.jl b/src/active_set_quadratic.jl index 08f8e4e96..0b19d7dba 100644 --- a/src/active_set_quadratic.jl +++ b/src/active_set_quadratic.jl @@ -1,6 +1,6 @@ """ - ActiveSetQuadratic{AT, R, IT} + ActiveSetQuadraticProductCaching{AT, R, IT} Represents an active set of extreme vertices collected in a FW algorithm, along with their coefficients `(λ_i, a_i)`. @@ -9,7 +9,7 @@ The iterate `x = ∑λ_i a_i` is stored in x with type `IT`. The objective function is assumed to be of the form `f(x)=½⟨x,Ax⟩+⟨b,x⟩+c` so that the gradient is simply `∇f(x)=Ax+b`. """ -struct ActiveSetQuadratic{AT, R <: Real, IT, H} <: AbstractActiveSet{AT,R,IT} +struct ActiveSetQuadraticProductCaching{AT, R <: Real, IT, H} <: AbstractActiveSet{AT,R,IT} weights::Vector{R} atoms::Vector{AT} x::IT @@ -48,21 +48,21 @@ function detect_quadratic_function(grad!, x0; test=true) return A, b end -function ActiveSetQuadratic(tuple_values::AbstractVector{Tuple{R,AT}}, grad!::Function) where {AT,R} +function ActiveSetQuadraticProductCaching(tuple_values::AbstractVector{Tuple{R,AT}}, grad!::Function) where {AT,R} A, b = detect_quadratic_function(grad!, tuple_values[1][2]) - return ActiveSetQuadratic(tuple_values, A, b) + return ActiveSetQuadraticProductCaching(tuple_values, A, b) end -function ActiveSetQuadratic(tuple_values::AbstractVector{Tuple{R,AT}}, A::H, b) where {AT,R,H} - return ActiveSetQuadratic{AT,R}(tuple_values, A, b) +function ActiveSetQuadraticProductCaching(tuple_values::AbstractVector{Tuple{R,AT}}, A::H, b) where {AT,R,H} + return ActiveSetQuadraticProductCaching{AT,R}(tuple_values, A, b) end -function ActiveSetQuadratic{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, grad!::Function) where {AT,R} +function ActiveSetQuadraticProductCaching{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, grad!::Function) where {AT,R} A, b = detect_quadratic_function(grad!, tuple_values[1][2]) - return ActiveSetQuadratic{AT,R}(tuple_values, A, b) + return ActiveSetQuadraticProductCaching{AT,R}(tuple_values, A, b) end -function ActiveSetQuadratic{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, A::H, b) where {AT,R,H} +function ActiveSetQuadraticProductCaching{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, A::H, b) where {AT,R,H} n = length(tuple_values) weights = Vector{R}(undef, n) atoms = Vector{AT}(undef, n) @@ -76,7 +76,7 @@ function ActiveSetQuadratic{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number, atoms[idx] = tuple_values[idx][2] end x = similar(b) - as = ActiveSetQuadratic{AT,R,typeof(x),H}(weights, atoms, x, A, b, dots_x, dots_A, dots_b, weights_prev, modified) + as = ActiveSetQuadraticProductCaching{AT,R,typeof(x),H}(weights, atoms, x, A, b, dots_x, dots_A, dots_b, weights_prev, modified) reset_quadratic_dots!(as) compute_active_set_iterate!(as) return as @@ -84,7 +84,7 @@ end # should only be called upon construction # for active sets with a large number of atoms, this function becomes very costly -function reset_quadratic_dots!(as::ActiveSetQuadratic{AT,R}) where {AT,R} +function reset_quadratic_dots!(as::ActiveSetQuadraticProductCaching{AT,R}) where {AT,R} @inbounds for idx in 1:length(as) as.dots_A[idx] = Vector{R}(undef, idx) for idy in 1:idx @@ -108,16 +108,16 @@ function Base.:*(a::Identity, b) end end -function ActiveSetQuadratic(tuple_values::AbstractVector{Tuple{R,AT}}, A::UniformScaling, b) where {AT,R} - return ActiveSetQuadratic(tuple_values, Identity(A.λ), b) +function ActiveSetQuadraticProductCaching(tuple_values::AbstractVector{Tuple{R,AT}}, A::UniformScaling, b) where {AT,R} + return ActiveSetQuadraticProductCaching(tuple_values, Identity(A.λ), b) end -function ActiveSetQuadratic{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, A::UniformScaling, b) where {AT,R} - return ActiveSetQuadratic{AT,R}(tuple_values, Identity(A.λ), b) +function ActiveSetQuadraticProductCaching{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, A::UniformScaling, b) where {AT,R} + return ActiveSetQuadraticProductCaching{AT,R}(tuple_values, Identity(A.λ), b) end # these three functions do not update the active set iterate -function Base.push!(as::ActiveSetQuadratic{AT,R}, (λ, a)) where {AT,R} +function Base.push!(as::ActiveSetQuadraticProductCaching{AT,R}, (λ, a)) where {AT,R} dot_x = zero(R) dot_A = Vector{R}(undef, length(as)) dot_b = fast_dot(as.b, a) @@ -140,7 +140,7 @@ function Base.push!(as::ActiveSetQuadratic{AT,R}, (λ, a)) where {AT,R} end # TODO multi-indices version -function Base.deleteat!(as::ActiveSetQuadratic, idx::Int) +function Base.deleteat!(as::ActiveSetQuadraticProductCaching, idx::Int) @inbounds for i in 1:idx-1 as.dots_x[i] -= as.weights_prev[idx] * as.dots_A[idx][i] end @@ -158,7 +158,7 @@ function Base.deleteat!(as::ActiveSetQuadratic, idx::Int) return as end -function Base.empty!(as::ActiveSetQuadratic) +function Base.empty!(as::ActiveSetQuadraticProductCaching) empty!(as.atoms) empty!(as.weights) as.x .= 0 @@ -171,7 +171,7 @@ function Base.empty!(as::ActiveSetQuadratic) end function active_set_update!( - active_set::ActiveSetQuadratic{AT,R}, + active_set::ActiveSetQuadraticProductCaching{AT,R}, lambda, atom, renorm=true, idx=nothing; weight_purge_threshold=weight_purge_threshold_default(R), add_dropped_vertices=false, @@ -200,7 +200,7 @@ function active_set_update!( return active_set end -function active_set_renormalize!(active_set::ActiveSetQuadratic) +function active_set_renormalize!(active_set::ActiveSetQuadraticProductCaching) renorm = sum(active_set.weights) active_set.weights ./= renorm active_set.weights_prev ./= renorm @@ -209,7 +209,7 @@ function active_set_renormalize!(active_set::ActiveSetQuadratic) return active_set end -function active_set_argmin(active_set::ActiveSetQuadratic, direction) +function active_set_argmin(active_set::ActiveSetQuadraticProductCaching, direction) valm = typemax(eltype(direction)) idxm = -1 idx_modified = findall(active_set.modified) @@ -240,7 +240,7 @@ function active_set_argmin(active_set::ActiveSetQuadratic, direction) return (active_set[idxm]..., idxm) end -function active_set_argminmax(active_set::ActiveSetQuadratic, direction; Φ=0.5) +function active_set_argminmax(active_set::ActiveSetQuadraticProductCaching, direction; Φ=0.5) valm = typemax(eltype(direction)) valM = typemin(eltype(direction)) idxm = -1 @@ -281,7 +281,7 @@ function active_set_argminmax(active_set::ActiveSetQuadratic, direction; Φ=0.5) end # in-place warm-start of a quadratic active set for A and b -function update_active_set_quadratic!(warm_as::ActiveSetQuadratic{AT,R}, A::H, b) where {AT,R,H} +function update_active_set_quadratic!(warm_as::ActiveSetQuadraticProductCaching{AT,R}, A::H, b) where {AT,R,H} @inbounds for idx in eachindex(warm_as) for idy in 1:idx warm_as.dots_A[idx][idy] = fast_dot(A * warm_as.atoms[idx], warm_as.atoms[idy]) @@ -292,7 +292,7 @@ function update_active_set_quadratic!(warm_as::ActiveSetQuadratic{AT,R}, A::H, b end # in-place warm-start of a quadratic active set for b -function update_active_set_quadratic!(warm_as::ActiveSetQuadratic{AT,R,IT,H}, b) where {AT,R,IT,H} +function update_active_set_quadratic!(warm_as::ActiveSetQuadraticProductCaching{AT,R,IT,H}, b) where {AT,R,IT,H} warm_as.dots_x .= 0 warm_as.weights_prev .= 0 warm_as.modified .= true @@ -304,7 +304,7 @@ function update_active_set_quadratic!(warm_as::ActiveSetQuadratic{AT,R,IT,H}, b) return warm_as end -function update_weights!(as::ActiveSetQuadratic, new_weights) +function update_weights!(as::ActiveSetQuadraticProductCaching, new_weights) as.weights_prev .= as.weights as.weights .= new_weights as.modified .= true diff --git a/src/active_set_quadratic_direct_solve.jl b/src/active_set_quadratic_direct_solve.jl index c90ab0a8e..4a560377d 100644 --- a/src/active_set_quadratic_direct_solve.jl +++ b/src/active_set_quadratic_direct_solve.jl @@ -55,7 +55,7 @@ end """ ActiveSetQuadraticLinearSolve(tuple_values::Vector{Tuple{R,AT}}, A, b, lp_optimizer) -Creates an `ActiveSetQuadraticLinearSolve` from the given Hessian `A`, linear term `b` and `lp_optimizer` by creating an inner `ActiveSetQuadratic` active set. +Creates an `ActiveSetQuadraticLinearSolve` from the given Hessian `A`, linear term `b` and `lp_optimizer` by creating an inner `ActiveSetQuadraticProductCaching` active set. """ function ActiveSetQuadraticLinearSolve( tuple_values::Vector{Tuple{R,AT}}, @@ -64,7 +64,7 @@ function ActiveSetQuadraticLinearSolve( lp_optimizer; scheduler=LogScheduler(), ) where {AT,R,H} - inner_as = ActiveSetQuadratic(tuple_values, A, b) + inner_as = ActiveSetQuadraticProductCaching(tuple_values, A, b) return ActiveSetQuadraticLinearSolve( inner_as.weights, inner_as.atoms, @@ -155,7 +155,7 @@ function ActiveSetQuadraticLinearSolve{AT,R}( lp_optimizer; scheduler=LogScheduler(), ) where {AT,R,H} - inner_as = ActiveSetQuadratic{AT,R}(tuple_values, A, b) + inner_as = ActiveSetQuadraticProductCaching{AT,R}(tuple_values, A, b) as = ActiveSetQuadraticLinearSolve{AT,R,typeof(x),H}( inner_as.weights, inner_as.atoms, diff --git a/src/pairwise.jl b/src/pairwise.jl index 0843d8039..5a4e7c7e9 100644 --- a/src/pairwise.jl +++ b/src/pairwise.jl @@ -261,7 +261,7 @@ function pairwise_frank_wolfe( active_set, -gamma, away_vertex, - true, + false, away_index, add_dropped_vertices=use_extra_vertex_storage, vertex_storage=extra_vertex_storage, @@ -273,9 +273,11 @@ function pairwise_frank_wolfe( end end end - # fw update + # fw update active_set_update!(active_set, gamma, fw_vertex, renorm, fw_index) end + # println(active_set.weights) + # println([atom[1] for atom in active_set.atoms]) if callback !== nothing state = CallbackState( diff --git a/src/types.jl b/src/types.jl index 0a00c2d16..e5bc9b266 100644 --- a/src/types.jl +++ b/src/types.jl @@ -256,55 +256,60 @@ Base.@propagate_inbounds function muladd_memory_mode(::InplaceEmphasis, x::Matri end """ - SymmetricArray{T, DT} + SubspaceVector{HasMultiplicities, T} +Companion structure of `SubspaceLMO` containing three fields: +- `data` is the full structure to be deflated, +- `vec` is a vector in the reduced subspace in which computations are performed, +- `mul` is only used to compute scalar products when `HasMultiplicities = true`, +which should be avoided (when possible) for performance reasons. """ -struct SymmetricArray{HasMultiplicities,T,DT,V<:AbstractVector{T}} <: AbstractVector{T} +struct SubspaceVector{HasMultiplicities,T,DT,V<:AbstractVector{T}} <: AbstractVector{T} data::DT # full array to be symmetrised, will generally fall out of sync wrt vec vec::V # vector representing the array mul::Vector{T} # only used for scalar products end -function SymmetricArray(data::DT, vec::V) where {DT,V<:AbstractVector{T}} where {T} - return SymmetricArray{false,T,DT,V}(data, vec, T[]) +function SubspaceVector(data::DT, vec::V) where {DT,V<:AbstractVector{T}} where {T} + return SubspaceVector{false,T,DT,V}(data, vec, T[]) end -function SymmetricArray(data::DT, vec::V, mul::Vector) where {DT,V<:AbstractVector{T}} where {T} - return SymmetricArray{true,T,DT,V}(data, vec, convert(Vector{T}, mul)) +function SubspaceVector(data::DT, vec::V, mul::Vector) where {DT,V<:AbstractVector{T}} where {T} + return SubspaceVector{true,T,DT,V}(data, vec, convert(Vector{T}, mul)) end -Base.@propagate_inbounds function Base.getindex(A::SymmetricArray, i) +Base.@propagate_inbounds function Base.getindex(A::SubspaceVector, i) @boundscheck checkbounds(A.vec, i) return @inbounds getindex(A.vec, i) end -Base.@propagate_inbounds function Base.setindex!(A::SymmetricArray, x, i) +Base.@propagate_inbounds function Base.setindex!(A::SubspaceVector, x, i) @boundscheck checkbounds(A.vec, i) return @inbounds setindex!(A.vec, x, i) end -Base.size(A::SymmetricArray) = size(A.vec) -Base.eltype(A::SymmetricArray) = eltype(A.vec) -Base.similar(A::SymmetricArray{true}) = SymmetricArray(similar(A.data), similar(A.vec), A.mul) -Base.similar(A::SymmetricArray{false}) = SymmetricArray(similar(A.data), similar(A.vec)) -Base.similar(A::SymmetricArray{true}, ::Type{T}) where {T} = SymmetricArray(similar(A.data, T), similar(A.vec, T), convert(Vector{T}, A.mul)) -Base.similar(A::SymmetricArray{false}, ::Type{T}) where {T} = SymmetricArray(similar(A.data, T), similar(A.vec, T)) -Base.collect(A::SymmetricArray{true}) = SymmetricArray(collect(A.data), collect(A.vec), A.mul) -Base.collect(A::SymmetricArray{false}) = SymmetricArray(collect(A.data), collect(A.vec)) -Base.copyto!(dest::SymmetricArray, src::SymmetricArray) = copyto!(dest.vec, src.vec) -Base.:*(scalar::Real, A::SymmetricArray{true}) = SymmetricArray(A.data, scalar * A.vec, A.mul) -Base.:*(scalar::Real, A::SymmetricArray{false}) = SymmetricArray(A.data, scalar * A.vec) -Base.:*(A::SymmetricArray, scalar::Real) = scalar * A -Base.:/(A::SymmetricArray, scalar::Real) = inv(scalar) * A -Base.:+(A1::SymmetricArray{true,T}, A2::SymmetricArray{true,T}) where {T} = SymmetricArray(A1.data, A1.vec + A2.vec, A1.mul) -Base.:+(A1::SymmetricArray{false,T}, A2::SymmetricArray{false,T}) where {T} = SymmetricArray(A1.data, A1.vec + A2.vec) -Base.:-(A1::SymmetricArray{true,T}, A2::SymmetricArray{true,T}) where {T} = SymmetricArray(A1.data, A1.vec - A2.vec, A1.mul) -Base.:-(A1::SymmetricArray{false,T}, A2::SymmetricArray{false,T}) where {T} = SymmetricArray(A1.data, A1.vec - A2.vec) -Base.:-(A::SymmetricArray{true,T}) where {T} = SymmetricArray(A.data, -A.vec, A.mul) -Base.:-(A::SymmetricArray{false,T}) where {T} = SymmetricArray(A.data, -A.vec) - -LinearAlgebra.dot(A1::SymmetricArray{true}, A2::SymmetricArray{true}) = dot(A1.vec, Diagonal(A1.mul), A2.vec) -LinearAlgebra.dot(A1::SymmetricArray{false}, A2::SymmetricArray{false}) = dot(A1.vec, A2.vec) -LinearAlgebra.norm(A::SymmetricArray) = sqrt(dot(A, A)) - -Base.@propagate_inbounds Base.isequal(A1::SymmetricArray, A2::SymmetricArray) = isequal(A1.vec, A2.vec) +Base.size(A::SubspaceVector) = size(A.vec) +Base.eltype(A::SubspaceVector) = eltype(A.vec) +Base.similar(A::SubspaceVector{true}) = SubspaceVector(similar(A.data), similar(A.vec), A.mul) +Base.similar(A::SubspaceVector{false}) = SubspaceVector(similar(A.data), similar(A.vec)) +Base.similar(A::SubspaceVector{true}, ::Type{T}) where {T} = SubspaceVector(similar(A.data, T), similar(A.vec, T), convert(Vector{T}, A.mul)) +Base.similar(A::SubspaceVector{false}, ::Type{T}) where {T} = SubspaceVector(similar(A.data, T), similar(A.vec, T)) +Base.collect(A::SubspaceVector{true}) = SubspaceVector(collect(A.data), collect(A.vec), A.mul) +Base.collect(A::SubspaceVector{false}) = SubspaceVector(collect(A.data), collect(A.vec)) +Base.copyto!(dest::SubspaceVector, src::SubspaceVector) = copyto!(dest.vec, src.vec) +Base.:*(scalar::Real, A::SubspaceVector{true}) = SubspaceVector(A.data, scalar * A.vec, A.mul) +Base.:*(scalar::Real, A::SubspaceVector{false}) = SubspaceVector(A.data, scalar * A.vec) +Base.:*(A::SubspaceVector, scalar::Real) = scalar * A +Base.:/(A::SubspaceVector, scalar::Real) = inv(scalar) * A +Base.:+(A1::SubspaceVector{true,T}, A2::SubspaceVector{true,T}) where {T} = SubspaceVector(A1.data, A1.vec + A2.vec, A1.mul) +Base.:+(A1::SubspaceVector{false,T}, A2::SubspaceVector{false,T}) where {T} = SubspaceVector(A1.data, A1.vec + A2.vec) +Base.:-(A1::SubspaceVector{true,T}, A2::SubspaceVector{true,T}) where {T} = SubspaceVector(A1.data, A1.vec - A2.vec, A1.mul) +Base.:-(A1::SubspaceVector{false,T}, A2::SubspaceVector{false,T}) where {T} = SubspaceVector(A1.data, A1.vec - A2.vec) +Base.:-(A::SubspaceVector{true,T}) where {T} = SubspaceVector(A.data, -A.vec, A.mul) +Base.:-(A::SubspaceVector{false,T}) where {T} = SubspaceVector(A.data, -A.vec) + +LinearAlgebra.dot(A1::SubspaceVector{true}, A2::SubspaceVector{true}) = dot(A1.vec, Diagonal(A1.mul), A2.vec) +LinearAlgebra.dot(A1::SubspaceVector{false}, A2::SubspaceVector{false}) = dot(A1.vec, A2.vec) +LinearAlgebra.norm(A::SubspaceVector) = sqrt(dot(A, A)) + +Base.@propagate_inbounds Base.isequal(A1::SubspaceVector, A2::SubspaceVector) = isequal(A1.vec, A2.vec) diff --git a/test/active_set.jl b/test/active_set.jl index 101b17d07..29c98f62a 100644 --- a/test/active_set.jl +++ b/test/active_set.jl @@ -4,7 +4,7 @@ using FrankWolfe import FrankWolfe: ActiveSet using LinearAlgebra: norm -@testset "Active sets" begin +@testset "Active sets " begin @testset "Constructors and eltypes" begin active_set = ActiveSet([(0.1, [1, 2, 3]), (0.9, [2, 3, 4]), (0.0, [5, 6, 7])]) @@ -100,7 +100,7 @@ using LinearAlgebra: norm end end -@testset "Simplex gradient descent" begin +@testset "Simplex gradient descent " begin # Gradient descent over a 2-D unit simplex # each atom is a vertex, direction points to [1,1] # note: integers for atom element types @@ -175,7 +175,7 @@ end end end -@testset "LP separation oracle" begin +@testset "LP separation oracle " begin # Gradient descent over a L-inf ball of radius one # current active set contains 3 vertices # direction points to [1,1] @@ -212,7 +212,7 @@ end ) end -@testset "Argminmax" begin +@testset "Argminmax " begin active_set = FrankWolfe.ActiveSet([(0.6, [-1, -1]), (0.2, [0, 1]), (0.2, [1, 0])]) (λ_min, a_min, i_min, val, λ_max, a_max, i_max, valM, progress) = FrankWolfe.active_set_argminmax(active_set, [1, 1.5]) @@ -221,7 +221,7 @@ end @test_throws ErrorException FrankWolfe.active_set_argminmax(active_set, [NaN, NaN]) end -@testset "LPseparationWithScaledHotVector" begin +@testset "LPseparationWithScaledHotVector " begin v1 = FrankWolfe.ScaledHotVector(1, 1, 2) v2 = FrankWolfe.ScaledHotVector(1, 2, 2) v3 = FrankWolfe.ScaledHotVector(0, 2, 2) @@ -240,7 +240,7 @@ end ) end -@testset "ActiveSet for BigFloat" begin +@testset "ActiveSet for BigFloat " begin n = Int(1e2) lmo = FrankWolfe.LpNormLMO{BigFloat,1}(rand()) x0 = Vector(FrankWolfe.compute_extreme_point(lmo, zeros(n))) diff --git a/test/active_set_variants.jl b/test/active_set_variants.jl index 59b4a1cad..c191389f7 100644 --- a/test/active_set_variants.jl +++ b/test/active_set_variants.jl @@ -9,7 +9,7 @@ function test_callback(state, active_set, args...) @test grad0 ≈ state.gradient end -@testset "Testing active set Frank-Wolfe variants, BPFW, AFW, and PFW, including their lazy versions" begin +@testset "Testing active set based Frank-Wolfe variants " begin f(x) = norm(x)^2 function grad!(storage, x) @. storage = 2x @@ -134,7 +134,7 @@ end ) end -@testset "recompute or not last vertex" begin +@testset "Recompute or not last vertex " begin n = 10 f(x) = norm(x)^2 function grad!(storage, x) @@ -171,7 +171,7 @@ end @test lmo.counter == prev_counter - 1 end -@testset "Quadratic active set" begin +@testset "Quadratic active set " begin n = 2 # number of dimensions p = 10 # number of points function simple_reg_loss(θ, data_point) @@ -218,7 +218,7 @@ end end lmo = FrankWolfe.LpNormLMO{Float64, 2}(1.05 * norm(params_perfect)) x0 = FrankWolfe.compute_extreme_point(lmo, zeros(Float64, n+1)) - active_set = FrankWolfe.ActiveSetQuadratic([(1.0, x0)], gradf) + active_set = FrankWolfe.ActiveSetQuadraticProductCaching([(1.0, x0)], gradf) res = FrankWolfe.blended_pairwise_conditional_gradient( f, gradf, diff --git a/test/alternating_methods_tests.jl b/test/alternating_methods_tests.jl index 7bf9487a6..ddbd2bb8a 100644 --- a/test/alternating_methods_tests.jl +++ b/test/alternating_methods_tests.jl @@ -19,7 +19,7 @@ lmo1 = FrankWolfe.ScaledBoundLInfNormBall(-ones(n), zeros(n)) lmo2 = FrankWolfe.ScaledBoundLInfNormBall(zeros(n), ones(n)) lmo3 = FrankWolfe.ScaledBoundLInfNormBall(ones(n), 2 * ones(n)) -@testset "Testing alternating linear minimization with block coordinate FW for different LMO-pairs " begin +@testset "Testing ALM with block-coordinate FW " begin x, _ = FrankWolfe.alternating_linear_minimization( FrankWolfe.block_coordinate_frank_wolfe, @@ -102,7 +102,7 @@ lmo3 = FrankWolfe.ScaledBoundLInfNormBall(ones(n), 2 * ones(n)) (storage, x) -> storage .= zero(x), (lmo1, lmo3), ones(n), - verbose=true, + verbose=false, line_search=FrankWolfe.Shortstep(2), ) @@ -116,7 +116,7 @@ lmo3 = FrankWolfe.ScaledBoundLInfNormBall(ones(n), 2 * ones(n)) (lmo1, lmo_prob), ones(n), trajectory=true, - verbose=true, + verbose=false, ) @test abs(x.blocks[1][1]) < 1e-6 @@ -142,10 +142,10 @@ lmo3 = FrankWolfe.ScaledBoundLInfNormBall(ones(n), 2 * ones(n)) end -@testset "Testing different update orders for block coordinate FW in within alternating linear minimization" begin +@testset "Testing update orders for block-coordinate ALM-FW " begin orders = [ - FrankWolfe.FullUpdate(), + FrankWolfe.FullUpdate(), [FrankWolfe.CyclicUpdate(i) for i in [-1, 1, 2]]..., [FrankWolfe.StochasticUpdate(i) for i in [-1, 1, 2]]..., [FrankWolfe.DualGapOrder(i) for i in [-1, 1, 2]]..., @@ -168,7 +168,7 @@ end end end -@testset "Testing alternating linear minimization with different FW methods" begin +@testset "Testing ALM with different FW methods " begin methods = [ FrankWolfe.frank_wolfe, @@ -191,7 +191,7 @@ end end end -@testset "Testing block-coordinate FW with different update steps and linesearch strategies" begin +@testset "Testing stepsize/linesearch in block-coordinate FW" begin x, _, _, _, _ = FrankWolfe.alternating_linear_minimization( FrankWolfe.block_coordinate_frank_wolfe, @@ -256,9 +256,9 @@ end @test abs(x.blocks[2][1] - 1 / n) < 1e-6 end -@testset "Testing alternating projections for different LMO-pairs " begin +@testset "Testing alternating projections " begin - x, _, _, _, _ = FrankWolfe.alternating_projections((lmo1, lmo_prob), rand(n), verbose=true) + x, _, _, _, _ = FrankWolfe.alternating_projections((lmo1, lmo_prob), rand(n), verbose=false) @test abs(x.blocks[1][1]) < 1e-6 @test abs(x.blocks[2][1] - 1 / n) < 1e-6 diff --git a/test/as_quadratic_projection.jl b/test/as_quadratic_projection.jl index 2060d1997..66f76e488 100644 --- a/test/as_quadratic_projection.jl +++ b/test/as_quadratic_projection.jl @@ -11,7 +11,7 @@ k = 10000 # s = rand(1:100) s = 10 -@info "Seed $s" +# @info "Seed $s" Random.seed!(s) diff --git a/test/bcg_direction_error.jl b/test/bcg_direction_error.jl index 6c1ab0dba..b0fa28b89 100644 --- a/test/bcg_direction_error.jl +++ b/test/bcg_direction_error.jl @@ -33,7 +33,7 @@ x, v, primal, dual_gap, _, _ = FrankWolfe.blended_conditional_gradient( line_search=FrankWolfe.AdaptiveZerothOrder(L_est=2.0), print_iter=100, memory_mode=FrankWolfe.InplaceEmphasis(), - verbose=true, + verbose=false, trajectory=false, lazy_tolerance=1.0, weight_purge_threshold=1e-9, @@ -55,7 +55,7 @@ x, v, primal_cut, dual_gap, _, _ = FrankWolfe.blended_conditional_gradient( line_search=FrankWolfe.AdaptiveZerothOrder(L_est=2.0), print_iter=k / 10, memory_mode=FrankWolfe.InplaceEmphasis(), - verbose=true, + verbose=false, trajectory=false, lazy_tolerance=1.0, weight_purge_threshold=1e-10, diff --git a/test/benchmark.jl b/test/benchmark.jl index 38cc72253..0b6f13063 100644 --- a/test/benchmark.jl +++ b/test/benchmark.jl @@ -1,7 +1,6 @@ import FrankWolfe import LinearAlgebra - n = Int(1e6); xpi = rand(n); diff --git a/test/blocks.jl b/test/blocks.jl index 485716bde..ab2909ecb 100644 --- a/test/blocks.jl +++ b/test/blocks.jl @@ -4,7 +4,7 @@ using LinearAlgebra using FrankWolfe using SparseArrays -@testset "Block array behaviour" begin +@testset "Block array behavior " begin arr = FrankWolfe.BlockVector([ [ 1 3 @@ -53,7 +53,7 @@ using SparseArrays end -@testset "FrankWolfe array methods" begin +@testset "Frank-Wolfe array methods " begin @testset "Block arrays" begin mem = FrankWolfe.InplaceEmphasis() arr0 = FrankWolfe.BlockVector([ diff --git a/test/extra_storage.jl b/test/extra_storage.jl index 26b90055a..1465bceb6 100644 --- a/test/extra_storage.jl +++ b/test/extra_storage.jl @@ -11,7 +11,7 @@ function grad!(storage, x) return storage .= x .- center0 end -@testset "Blended Pairwise Conditional Gradient" begin +@testset "Blended Pairwise Conditional Gradient " begin lmo = FrankWolfe.UnitSimplexOracle(4.3) tlmo = FrankWolfe.TrackingLMO(lmo) @@ -59,7 +59,7 @@ end end end -@testset "Away-Frank-Wolfe" begin +@testset "Away-Frank-Wolfe " begin lmo = FrankWolfe.UnitSimplexOracle(4.3) tlmo = FrankWolfe.TrackingLMO(lmo) @@ -107,7 +107,7 @@ end end end -@testset "Blended Pairwise Conditional Gradient" begin +@testset "Blended Pairwise Conditional Gradient " begin lmo = FrankWolfe.UnitSimplexOracle(4.3) tlmo = FrankWolfe.TrackingLMO(lmo) diff --git a/test/function_gradient.jl b/test/function_gradient.jl index 4bfd1a734..01b10a7b0 100644 --- a/test/function_gradient.jl +++ b/test/function_gradient.jl @@ -7,7 +7,7 @@ import FrankWolfe: compute_gradient, compute_value, compute_value_gradient Random.seed!(123) -@testset "Simple and stochastic function gradient interface" begin +@testset "Simple and stochastic function gradient interface " begin for n in (1, 5, 20) A = rand(Float16, n, n) A .+= A' diff --git a/test/generic-arrays.jl b/test/generic-arrays.jl index eab5107a9..53e0a8a17 100644 --- a/test/generic-arrays.jl +++ b/test/generic-arrays.jl @@ -8,7 +8,7 @@ function Base.convert(::Type{GenericArray{Float64,1}}, v::FrankWolfe.ScaledHotVe return GenericArray(collect(v)) end -@testset "GenericArray is maintained" begin +@testset "GenericArray is maintained " begin n = Int(1e3) k = n @@ -29,7 +29,7 @@ end max_iteration=k, line_search=FrankWolfe.Agnostic(), print_iter=k / 10, - verbose=true, + verbose=false, memory_mode=FrankWolfe.InplaceEmphasis(), ) @@ -44,7 +44,7 @@ end max_iteration=k, line_search=FrankWolfe.Agnostic(), print_iter=k / 10, - verbose=true, + verbose=false, memory_mode=FrankWolfe.InplaceEmphasis(), VType=FrankWolfe.ScaledHotVector{Float64}, ) @@ -60,12 +60,12 @@ end max_iteration=k, line_search=FrankWolfe.Agnostic(), print_iter=k / 10, - verbose=true, + verbose=false, memory_mode=FrankWolfe.InplaceEmphasis(), ) end -@testset "fast_equal special types" begin +@testset "Testing fast_equal for special types " begin @testset "ScaledHotVector" begin a = FrankWolfe.ScaledHotVector(3.5, 3, 10) b = FrankWolfe.ScaledHotVector(3.5, 3, 11) diff --git a/test/lmo.jl b/test/lmo.jl index de36e591a..f690b9736 100644 --- a/test/lmo.jl +++ b/test/lmo.jl @@ -14,7 +14,7 @@ import Clp import Hypatia using JuMP -@testset "Simplex LMOs" begin +@testset "Simplex LMOs " begin n = 6 direction = zeros(6) rhs = 10 * rand() @@ -60,7 +60,7 @@ using JuMP end end -@testset "Hypersimplex" begin +@testset "Hypersimplex " begin @testset "Hypersimplex $n $K" for n in (2, 5, 10), K in (1, min(n, 4)) direction = randn(n) hypersimplex = FrankWolfe.HyperSimplexOracle(K, 3.0) @@ -80,7 +80,7 @@ end end end -@testset "Lp-norm epigraph LMO" begin +@testset "Lp-norm epigraph LMO " begin for n in (1, 2, 5, 10) τ = 5 + 3 * rand() # tests that the "special" p behaves like the "any" p, i.e. 2.0 and 2 @@ -123,7 +123,7 @@ end end end -@testset "K-sparse polytope LMO" begin +@testset "K-sparse polytope LMO " begin @testset "$n-dimension" for n in (1, 2, 10) τ = 5 + 3 * rand() for K in 1:n @@ -153,7 +153,7 @@ end @test norm(v) > 0 end -@testset "Caching on simplex LMOs" begin +@testset "Caching on simplex LMOs " begin n = 6 direction = zeros(6) rhs = 10 * rand() @@ -203,7 +203,7 @@ function _is_doubly_stochastic(m) end end -@testset "Birkhoff polytope" begin +@testset "Birkhoff polytope " begin Random.seed!(42) lmo = FrankWolfe.BirkhoffPolytopeLMO() for n in (1, 2, 10) @@ -247,7 +247,7 @@ end ] end -@testset "Matrix completion and nuclear norm" begin +@testset "Matrix completion and nuclear norm " begin nfeat = 50 nobs = 100 r = 5 @@ -314,7 +314,7 @@ end ) end -@testset "Spectral norms" begin +@testset "Spectral norms " begin Random.seed!(42) o = Hypatia.Optimizer() MOI.set(o, MOI.Silent(), true) @@ -403,7 +403,7 @@ end end end -@testset "MOI oracle consistency" begin +@testset "MOI oracle consistency " begin Random.seed!(42) o = GLPK.Optimizer() MOI.set(o, MOI.Silent(), true) @@ -538,7 +538,7 @@ end end end -@testset "MOI oracle on Birkhoff polytope" begin +@testset "MOI oracle on Birkhoff polytope " begin o = GLPK.Optimizer() o_ref = GLPK.Optimizer() for n in (1, 2, 10) @@ -576,7 +576,7 @@ end end end -@testset "MOI oracle and KSparseLMO" begin +@testset "MOI oracle and KSparseLMO " begin o_base = GLPK.Optimizer() cached = MOI.Utilities.CachingOptimizer( MOI.Utilities.UniversalFallback(MOI.Utilities.Model{Float64}()), @@ -640,7 +640,7 @@ end end end -@testset "Product LMO" begin +@testset "Product LMO " begin lmo = FrankWolfe.ProductLMO(FrankWolfe.LpNormLMO{Inf}(3.0), FrankWolfe.LpNormLMO{1}(2.0)) dinf = randn(10) d1 = randn(5) @@ -659,7 +659,7 @@ end @test FrankWolfe.BlockVector([vinf, v1]) == v_block end -@testset "Scaled L-1 norm polytopes" begin +@testset "Scaled L-1 norm polytopes " begin lmo = FrankWolfe.ScaledBoundL1NormBall(-ones(10), ones(10)) # equivalent to LMO lmo_ref = FrankWolfe.LpNormLMO{1}(1) @@ -691,7 +691,7 @@ end @test v ≈ [-1, 0] end -@testset "Scaled L-inf norm polytopes" begin +@testset "Scaled L-inf norm polytopes " begin # tests ScaledBoundLInfNormBall for the standard hypercube, a shifted one, and a scaled one lmo = FrankWolfe.ScaledBoundLInfNormBall(-ones(10), ones(10)) lmo_ref = FrankWolfe.LpNormLMO{Inf}(1) @@ -736,7 +736,7 @@ end end end -@testset "Copy MathOpt LMO" begin +@testset "Copy MathOpt LMO " begin o_clp = MOI.Utilities.CachingOptimizer( MOI.Utilities.UniversalFallback(MOI.Utilities.Model{Float64}()), Clp.Optimizer(), @@ -758,7 +758,7 @@ end end end -@testset "MathOpt LMO with BlockVector" begin +@testset "MathOpt LMO with BlockVector " begin o = GLPK.Optimizer() MOI.set(o, MOI.Silent(), true) x = MOI.add_variables(o, 10) @@ -777,7 +777,7 @@ end end -@testset "Inplace LMO correctness" begin +@testset "Inplace LMO correctness " begin V = [-6.0, -6.15703, -5.55986] M = [3.0 2.8464949 2.4178848; 2.8464949 3.0 2.84649498; 2.4178848 2.84649498 3.0] @@ -801,7 +801,7 @@ end @test x_dense == x_standard end -@testset "Ellipsoid LMO $n" for n in (2, 5, 10) +@testset "Ellipsoid LMO $n " for n in (2, 5, 9) A = zeros(n, n) A[1, 1] = 3 @test_throws PosDefException FrankWolfe.EllipsoidLMO(A) @@ -833,7 +833,7 @@ end @test dot(xv, d) ≈ dot(v, d) atol = 1e-5 * n end -@testset "Convex hull" begin +@testset "Convex hull " begin lmo = FrankWolfe.ConvexHullOracle([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) for _ in 1:100 d = randn(3) @@ -846,7 +846,7 @@ end @test v == lmo.vertices[1] end -@testset "Symmetric LMO" begin +@testset "Symmetric LMO " begin # See examples/reynolds.jl struct BellCorrelationsLMO{T} <: FrankWolfe.LinearMinimizationOracle m::Int # number of inputs @@ -912,7 +912,7 @@ end end reynolds_adjoint(gradient, lmo) = gradient lmo = BellCorrelationsLMO{Float64}(size(p, 1), zeros(size(p, 1))) - sym = FrankWolfe.SymmetricLMO(lmo, reynolds_permutedims, reynolds_adjoint) + sym = FrankWolfe.SubspaceLMO(lmo, reynolds_permutedims, reynolds_adjoint) x0 = FrankWolfe.compute_extreme_point(sym, -p) active_set = FrankWolfe.ActiveSet([(1.0, x0)]) res = FrankWolfe.blended_pairwise_conditional_gradient( @@ -927,7 +927,7 @@ end @test length(res[6]) < 25 end -@testset "Ordered Weighted Norm LMO" begin +@testset "Ordered Weighted Norm LMO " begin Random.seed!(4321) N = Int(1e3) for _ in 1:10 diff --git a/test/momentum_memory.jl b/test/momentum_memory.jl index 45fa98001..5ec6aea50 100644 --- a/test/momentum_memory.jl +++ b/test/momentum_memory.jl @@ -8,7 +8,7 @@ n = Int(1e3) k = 10000 s = rand(1:100) -@info "Seed: $s" +# @info "Seed: $s" Random.seed!(s) @@ -47,7 +47,7 @@ xmem, _ = FrankWolfe.frank_wolfe( max_iteration=k, line_search=FrankWolfe.Shortstep(2.0), memory_mode=FrankWolfe.InplaceEmphasis(), - verbose=true, + verbose=false, trajectory=true, momentum=0.9, ) diff --git a/test/oddities.jl b/test/oddities.jl index 466a5019f..b64ecb901 100644 --- a/test/oddities.jl +++ b/test/oddities.jl @@ -2,7 +2,7 @@ import FrankWolfe using LinearAlgebra using Test -@testset "Testing adaptive LS when already optimal and gradient is 0" begin +@testset "Testing adaptive LS at optimality " begin f(x) = norm(x)^2 function grad!(storage, x) return storage .= 2x @@ -19,7 +19,7 @@ using Test x0, max_iteration=1000, line_search=FrankWolfe.Agnostic(), - verbose=true, + verbose=false, )[3], ) < 1.0e-10 @@ -32,7 +32,7 @@ using Test x0, max_iteration=1000, line_search=FrankWolfe.AdaptiveZerothOrder(), - verbose=true, + verbose=false, )[3], ) < 1.0e-10 @test abs( @@ -43,7 +43,7 @@ using Test x0, max_iteration=1000, line_search=FrankWolfe.Adaptive(), - verbose=true, + verbose=false, )[3], ) < 1.0e-10 @@ -58,7 +58,7 @@ using Test max_iteration=1000, lazy=true, line_search=FrankWolfe.AdaptiveZerothOrder(), - verbose=true, + verbose=false, )[3], ) < 1.0e-10 @test abs( @@ -70,7 +70,7 @@ using Test max_iteration=1000, lazy=true, line_search=FrankWolfe.Adaptive(), - verbose=true, + verbose=false, )[3], ) < 1.0e-10 @@ -83,7 +83,7 @@ using Test x0, max_iteration=1000, line_search=FrankWolfe.Adaptive(), - verbose=true, + verbose=false, )[3], ) < 1.0e-10 diff --git a/test/quadratic_lp_active_set.jl b/test/quadratic_lp_active_set.jl index a87b37cea..50bcdb6e6 100644 --- a/test/quadratic_lp_active_set.jl +++ b/test/quadratic_lp_active_set.jl @@ -9,7 +9,7 @@ n = Int(1e4) k = 5000 s = 10 -@info "Seed $s" +# @info "Seed $s" Random.seed!(s) xpi = rand(n); @@ -78,7 +78,7 @@ x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( as_quad_direct_generic, max_iteration=k, line_search=FrankWolfe.Shortstep(2.0), - verbose=true, + verbose=false, callback=build_callback(trajectoryBPCG_quadratic_direct_generic), ); @@ -97,7 +97,7 @@ x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( as_quad_direct_basic_as, max_iteration=k, line_search=FrankWolfe.Shortstep(2.0), - verbose=true, + verbose=false, callback=build_callback(trajectoryBPCG_quadratic_noqas), ); diff --git a/test/rational_test.jl b/test/rational_test.jl index e9725157a..415b33291 100644 --- a/test/rational_test.jl +++ b/test/rational_test.jl @@ -37,7 +37,7 @@ xmem, vmem, primal, dual_gap, trajectory = FrankWolfe.frank_wolfe( max_iteration=k, line_search=FrankWolfe.Agnostic(), print_iter=k / 10, - verbose=true, + verbose=false, memory_mode=FrankWolfe.InplaceEmphasis(), ) @@ -49,7 +49,7 @@ xstep, _ = FrankWolfe.frank_wolfe( max_iteration=k, line_search=FrankWolfe.Shortstep(2 // 1), print_iter=k / 10, - verbose=true, + verbose=false, ) @test eltype(xmem) == eltype(xstep) == Rational{BigInt} diff --git a/test/runtests.jl b/test/runtests.jl index 112582329..3a2bd298e 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -10,7 +10,7 @@ include("utils.jl") include("active_set_variants.jl") include("alternating_methods_tests.jl") -@testset "Testing vanilla Frank-Wolfe with various step size and momentum strategies" begin +@testset "Testing vanilla Frank-Wolfe " begin f(x) = norm(x)^2 function grad!(storage, x) return storage .= 2x @@ -25,7 +25,7 @@ include("alternating_methods_tests.jl") x0, max_iteration=10000, line_search=FrankWolfe.GeneralizedAgnostic(), - verbose=true, + verbose=false, )[3] - 0.2, ) < 1.0e-5 g(x) = 2 + log(1 + log(x + 1)) @@ -37,7 +37,7 @@ include("alternating_methods_tests.jl") x0, max_iteration=10000, line_search=FrankWolfe.GeneralizedAgnostic(g), - verbose=true, + verbose=false, )[3] - 0.2, ) < 1.0e-5 @test abs( @@ -48,7 +48,7 @@ include("alternating_methods_tests.jl") x0, max_iteration=1000, line_search=FrankWolfe.Agnostic(), - verbose=true, + verbose=false, )[3] - 0.2, ) < 1.0e-5 @test abs( @@ -71,7 +71,7 @@ include("alternating_methods_tests.jl") x0, max_iteration=1000, line_search=FrankWolfe.Goldenratio(), - verbose=true, + verbose=false, )[3] - 0.2, ) < 1.0e-5 @test abs( @@ -155,12 +155,12 @@ include("alternating_methods_tests.jl") ) < 1.0e-3 end -@testset "Gradient with momentum correctly updated" begin +@testset "Gradient with momentum correctly updated " begin # fixing https://github.com/ZIB-IOL/FrankWolfe.jl/issues/47 include("momentum_memory.jl") end -@testset "Testing Lazified Conditional Gradients with various step size strategies" begin +@testset "Testing Lazified Conditional Gradients " begin f(x) = norm(x)^2 function grad!(storage, x) @. storage = 2x @@ -176,7 +176,7 @@ end x0, max_iteration=1000, line_search=FrankWolfe.Goldenratio(), - verbose=true, + verbose=false, )[3] - 0.2, ) < 1.0e-5 @test abs( @@ -187,7 +187,7 @@ end x0, max_iteration=1000, line_search=FrankWolfe.Backtracking(), - verbose=true, + verbose=false, )[3] - 0.2, ) < 1.0e-5 @test abs( @@ -198,7 +198,7 @@ end x0, max_iteration=1000, line_search=FrankWolfe.Shortstep(2.0), - verbose=true, + verbose=false, )[3] - 0.2, ) < 1.0e-5 @test abs( @@ -209,12 +209,12 @@ end x0, max_iteration=1000, line_search=FrankWolfe.AdaptiveZerothOrder(), - verbose=true, + verbose=false, )[3] - 0.2, ) < 1.0e-5 end -@testset "Testing Lazified Conditional Gradients with cache strategies" begin +@testset "Testing caching in Lazified Conditional Gradients " begin n = Int(1e5) L = 2 k = 1000 @@ -235,7 +235,7 @@ end x0, max_iteration=k, line_search=FrankWolfe.Shortstep(2.0), - verbose=true, + verbose=false, ) @test primal - 1 / n <= bound @@ -268,7 +268,7 @@ end @test primal - 1 / n <= bound end -@testset "Testing memory_mode blas vs memory" begin +@testset "Testing memory_mode blas vs memory " begin n = Int(1e5) k = 100 xpi = rand(n) @@ -335,7 +335,7 @@ end @test primal < f(x0) end end -@testset "Testing rational variant" begin +@testset "Testing rational variant " begin rhs = 1 n = 40 k = 1000 @@ -377,7 +377,7 @@ end line_search=FrankWolfe.Agnostic(), print_iter=k / 10, memory_mode=FrankWolfe.InplaceEmphasis(), - verbose=true, + verbose=false, ) @test eltype(x0) == eltype(x) == Rational{BigInt} @test f(x) <= 1e-4 @@ -393,7 +393,7 @@ end line_search=FrankWolfe.Shortstep(2 // 1), print_iter=k / 100, memory_mode=FrankWolfe.InplaceEmphasis(), - verbose=true, + verbose=false, ) x0 = FrankWolfe.compute_extreme_point(lmo, direction) @@ -406,11 +406,11 @@ end line_search=FrankWolfe.Shortstep(2 // 1), print_iter=k / 10, memory_mode=FrankWolfe.InplaceEmphasis(), - verbose=true, + verbose=false, ) @test eltype(x) == Rational{BigInt} end -@testset "Multi-precision tests" begin +@testset "Multi-precision tests " begin rhs = 1 n = 100 k = 1000 @@ -470,7 +470,7 @@ end line_search=FrankWolfe.AdaptiveZerothOrder(), print_iter=k / 10, memory_mode=FrankWolfe.InplaceEmphasis(), - verbose=true, + verbose=false, ) @test eltype(x0) == T @@ -485,7 +485,7 @@ end line_search=FrankWolfe.AdaptiveZerothOrder(), print_iter=k / 10, memory_mode=FrankWolfe.InplaceEmphasis(), - verbose=true, + verbose=false, ) @test eltype(x0) == T @@ -494,7 +494,7 @@ end end end -@testset "Stochastic FW linear regression" begin +@testset "Stochastic FW linear regression " begin function simple_reg_loss(θ, data_point) (xi, yi) = data_point (a, b) = (θ[1:end-1], θ[end]) @@ -581,7 +581,7 @@ end ) end -@testset "Away-step FW" begin +@testset "Away-step FW " begin n = 50 lmo_prob = FrankWolfe.ProbabilitySimplexOracle(1.0) x0 = FrankWolfe.compute_extreme_point(lmo_prob, rand(n)) @@ -612,7 +612,7 @@ end max_iteration=k, line_search=FrankWolfe.Backtracking(), print_iter=k / 10, - verbose=true, + verbose=false, memory_mode=FrankWolfe.OutplaceEmphasis(), ) @@ -627,7 +627,7 @@ end max_iteration=k, line_search=FrankWolfe.Backtracking(), print_iter=k / 10, - verbose=true, + verbose=false, memory_mode=FrankWolfe.OutplaceEmphasis(), ) @@ -643,7 +643,7 @@ end away_steps=false, line_search=FrankWolfe.Backtracking(), print_iter=k / 10, - verbose=true, + verbose=false, memory_mode=FrankWolfe.OutplaceEmphasis(), ) @@ -658,7 +658,7 @@ end max_iteration=k, line_search=FrankWolfe.Backtracking(), print_iter=k / 10, - verbose=true, + verbose=false, memory_mode=FrankWolfe.OutplaceEmphasis(), ) @@ -673,7 +673,7 @@ end max_iteration=k, line_search=FrankWolfe.Backtracking(), print_iter=k / 10, - verbose=true, + verbose=false, memory_mode=FrankWolfe.InplaceEmphasis(), ) @@ -689,7 +689,7 @@ end away_steps=false, line_search=FrankWolfe.Backtracking(), print_iter=k / 10, - verbose=true, + verbose=false, memory_mode=FrankWolfe.InplaceEmphasis(), ) @@ -704,7 +704,7 @@ end max_iteration=k, line_search=FrankWolfe.Backtracking(), print_iter=k / 10, - verbose=true, + verbose=false, memory_mode=FrankWolfe.InplaceEmphasis(), ) @@ -720,12 +720,12 @@ end max_iteration=k, line_search=FrankWolfe.Backtracking(), print_iter=k / 10, - verbose=true, + verbose=false, memory_mode=FrankWolfe.OutplaceEmphasis(), ) end -@testset "Blended conditional gradient" begin +@testset "Testing Blended Conditional Gradient " begin n = 50 lmo_prob = FrankWolfe.ProbabilitySimplexOracle(1.0) x0 = FrankWolfe.compute_extreme_point(lmo_prob, randn(n)) @@ -743,7 +743,7 @@ end x0, max_iteration=k, line_search=FrankWolfe.Backtracking(), - verbose=true, + verbose=false, memory_mode=FrankWolfe.OutplaceEmphasis(), ) @@ -765,56 +765,55 @@ end end - include("oddities.jl") include("tracking.jl") # in separate module for name space issues module BCGDirectionError using Test -@testset "BCG direction accuracy" begin +@testset "BCG direction accuracy " begin include("bcg_direction_error.jl") end end module RationalTest using Test -@testset "Rational test and shortstep" begin +@testset "Rational test and shortstep " begin include("rational_test.jl") end end module BCGAccel using Test -@testset "BCG acceleration with different types" begin +@testset "BCG acceleration with different types " begin include("blended_accelerated.jl") end end module VertexStorageTest using Test -@testset "Vertex storage" begin +@testset "Vertex storage " begin include("extra_storage.jl") end end module LpDirectSolveTest using Test -@testset "LP solving for quadratic functions and active set" begin +@testset "LP solving for quadratic functions and active set " begin include("quadratic_lp_active_set.jl") end end module LpDirectSolveTestProjection using Test -@testset "LP solving for quadratic functions and active set" begin +@testset "LP solving for quadratic functions and active set " begin include("as_quadratic_projection.jl") end end module SparsifyingActiveSetTest using Test -@testset "Sparsifying active set" begin +@testset "Sparsifying active set " begin include("sparsifying_activeset.jl") end end @@ -823,7 +822,7 @@ include("generic-arrays.jl") include("blocks.jl") -@testset "End-to-end trajectory tests" begin +@testset "End-to-end trajectory tests " begin trajectory_testfiles = readdir(joinpath(@__DIR__, "trajectory_tests"), join=true) for file in trajectory_testfiles @eval Module() begin diff --git a/test/sparsifying_activeset.jl b/test/sparsifying_activeset.jl index 1fde16d53..1cf8d8299 100644 --- a/test/sparsifying_activeset.jl +++ b/test/sparsifying_activeset.jl @@ -21,7 +21,7 @@ k = 10000 # s = rand(1:100) s = 10 -@info "Seed $s" +# @info "Seed $s" Random.seed!(s) xpi = rand(n); diff --git a/test/timeout.jl b/test/timeout.jl index 7094ef5b3..9ba8aa47e 100644 --- a/test/timeout.jl +++ b/test/timeout.jl @@ -2,7 +2,7 @@ using FrankWolfe using LinearAlgebra using SparseArrays -@testset "Timing out" begin +@testset "Timing out " begin f(x) = norm(x)^2 function grad!(storage, x) return storage .= 2x @@ -17,7 +17,7 @@ using SparseArrays x0, max_iteration=1000, line_search=FrankWolfe.Agnostic(), - verbose=true, + verbose=false, )[3] - 0.2, ) < 1.0e-5 @test abs( @@ -40,7 +40,7 @@ using SparseArrays x0, max_iteration=1000, line_search=FrankWolfe.Goldenratio(), - verbose=true, + verbose=false, )[3] - 0.2, ) < 1.0e-5 @test abs( diff --git a/test/tracking.jl b/test/tracking.jl index ecd7d1823..dac8cb5f7 100644 --- a/test/tracking.jl +++ b/test/tracking.jl @@ -3,7 +3,7 @@ using LinearAlgebra using SparseArrays using FrankWolfe -@testset "Tracking Testset" begin +@testset "Tracking Testset " begin f(x) = norm(x)^2 function grad!(storage, x) return storage .= 2x @@ -41,7 +41,7 @@ using FrankWolfe end end -@testset "Testing vanilla Frank-Wolfe with various step size and momentum strategies" begin +@testset "Testing vanilla Frank-Wolfe " begin f(x) = norm(x)^2 function grad!(storage, x) @@ -68,7 +68,7 @@ end trajectory=true, callback=nothing, traj_data=storage, - verbose=true, + verbose=false, ) @test length(storage[1]) == 5 @@ -79,7 +79,7 @@ end @test tlmo.counter == niters + 1 # x0 computation and initialization end -@testset "Testing lazified Frank-Wolfe with various step size and momentum strategies" begin +@testset "Testing lazified Frank-Wolfe " begin f(x) = norm(x)^2 function grad!(storage, x) diff --git a/test/trajectory_tests/open_loop_parametric.jl b/test/trajectory_tests/open_loop_parametric.jl index 474d98ecb..38b0e5fce 100644 --- a/test/trajectory_tests/open_loop_parametric.jl +++ b/test/trajectory_tests/open_loop_parametric.jl @@ -26,7 +26,7 @@ using LinearAlgebra line_search=FrankWolfe.Agnostic(2), print_iter=k / 10, epsilon=1e-5, - verbose=true, + verbose=false, trajectory=true, ) @@ -39,7 +39,7 @@ using LinearAlgebra line_search=FrankWolfe.Agnostic(10), print_iter=k / 10, epsilon=1e-5, - verbose=true, + verbose=false, trajectory=true, ) @@ -68,7 +68,7 @@ using LinearAlgebra line_search=FrankWolfe.Agnostic(2), print_iter=k / 10, epsilon=1e-5, - verbose=true, + verbose=false, trajectory=true, ) @@ -81,7 +81,7 @@ using LinearAlgebra line_search=FrankWolfe.Agnostic(10), print_iter=k / 10, epsilon=1e-5, - verbose=true, + verbose=false, trajectory=true, ) diff --git a/test/utils.jl b/test/utils.jl index 66ed66e03..3fc795dd1 100644 --- a/test/utils.jl +++ b/test/utils.jl @@ -3,7 +3,7 @@ using LinearAlgebra using Test using SparseArrays -@testset "Simple benchmark_oracles function" begin +@testset "Simple benchmark_oracles function " begin n = Int(1e3) xpi = rand(n) @@ -21,7 +21,7 @@ using SparseArrays FrankWolfe.benchmark_oracles(f, grad!, () -> rand(n), lmo_prob; k=100) end -@testset "RankOneMatrix" begin +@testset "RankOneMatrix " begin for n in (1, 2, 5) for _ in 1:5 v = rand(n) @@ -64,7 +64,7 @@ end end end -@testset "RankOne muladd_memory_mode $n" for n in (1, 2, 5) +@testset "RankOne muladd_memory_mode $n " for n in (1, 2, 5) for _ in 1:5 n = 5 v = rand(n) @@ -82,7 +82,7 @@ end end end -@testset "Line Search methods" begin +@testset "Line Search methods " begin a = [-1.0, -1.0, -1.0] b = [1.0, 1.0, 1.0] function grad!(storage, x) @@ -214,14 +214,14 @@ end ) end -@testset "Momentum tests" begin +@testset "Momentum tests " begin it = FrankWolfe.ExpMomentumIterator() it.num = 0 # no momentum -> 1 @test FrankWolfe.momentum_iterate(it) == 1 end -@testset "Fast dot complex & norm" begin +@testset "Fast dot complex & norm " begin s = sparse(I, 3, 3) m = randn(Complex{Float64}, 3, 3) @test dot(s, m) ≈ FrankWolfe.fast_dot(s, m) From 9ab7500175ad46878eef125e0a307ddb9dc93597 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Designolle?= <55443987+sebastiendesignolle@users.noreply.github.com> Date: Sun, 3 Nov 2024 08:45:22 +0100 Subject: [PATCH 15/21] Set renorm in the away update to false (#522) --- src/pairwise.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pairwise.jl b/src/pairwise.jl index 5a4e7c7e9..56269f503 100644 --- a/src/pairwise.jl +++ b/src/pairwise.jl @@ -266,7 +266,7 @@ function pairwise_frank_wolfe( add_dropped_vertices=use_extra_vertex_storage, vertex_storage=extra_vertex_storage, ) - if add_dropped_vertices && gamma == gamma_max + if add_dropped_vertices && gamma ≈ gamma_max for vtx in active_set.atoms if vtx != v push!(extra_vertex_storage, vtx) From 3bcaffdfa6fa09fe8e248fdcf219dd1865319a3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Designolle?= <55443987+sebastiendesignolle@users.noreply.github.com> Date: Sun, 3 Nov 2024 18:13:33 +0100 Subject: [PATCH 16/21] Systematically cleanup in fw step (#525) --- src/pairwise.jl | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/pairwise.jl b/src/pairwise.jl index 56269f503..1083a92ac 100644 --- a/src/pairwise.jl +++ b/src/pairwise.jl @@ -254,8 +254,6 @@ function pairwise_frank_wolfe( gamma = min(gamma_max, gamma) - # cleanup and renormalize every x iterations. Only for the fw steps. - renorm = mod(t, renorm_interval) == 0 # away update active_set_update!( active_set, @@ -274,7 +272,7 @@ function pairwise_frank_wolfe( end end # fw update - active_set_update!(active_set, gamma, fw_vertex, renorm, fw_index) + active_set_update!(active_set, gamma, fw_vertex, true, fw_index) end # println(active_set.weights) # println([atom[1] for atom in active_set.atoms]) From a8f2996c81991cf15b17961c4dcaaa963bd16f6d Mon Sep 17 00:00:00 2001 From: Hendrych Date: Tue, 5 Nov 2024 13:34:49 +0100 Subject: [PATCH 17/21] Test against DICG as well. --- examples/optimal_experiment_design.jl | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/examples/optimal_experiment_design.jl b/examples/optimal_experiment_design.jl index 3ad42d6ca..f60bf50c7 100644 --- a/examples/optimal_experiment_design.jl +++ b/examples/optimal_experiment_design.jl @@ -226,7 +226,13 @@ m = 300 f, grad! = build_a_criterion(A, build_safe=false) x0, active_set = build_start_point(A) domain_oracle = build_domain_oracle(A) - x_s, _, primal, dual_gap, traj_data_s, _ = FrankWolfe.blended_pairwise_conditional_gradient(f, grad!, lmo, active_set, verbose=true, line_search=FrankWolfe.Secant(40, 1e-8, domain_oracle), trajectory=true) + x_s, _, primal, dual_gap, traj_data_s, _ = FrankWolfe.blended_pairwise_conditional_gradient(f, grad!, lmo, active_set, verbose=true, line_search=FrankWolfe.Secant(domain_oracle=domain_oracle), trajectory=true) + + lmo = FrankWolfe.ProbabilitySimplexOracle(1.0) + f, grad! = build_a_criterion(A, build_safe=false) + x0, active_set = build_start_point(A) + domain_oracle = build_domain_oracle(A) + x, _, primal, dual_gap, traj_data = decomposition_invariant_conditional_gradient(f, grad!, lmo, x0, verbose=true,line_search=FrankWolfe.Secant(domain_oracle=domain_oracle), trajectory=true) @test traj_data_s[end][1] < traj_data[end][1] @test isapprox(f(x_s), f(x)) @@ -244,7 +250,13 @@ m = 300 f, grad! = build_d_criterion(A, build_safe=false) x0, active_set = build_start_point(A) domain_oracle = build_domain_oracle(A) - x_s, _, primal, dual_gap, traj_data_s, _ = FrankWolfe.blended_pairwise_conditional_gradient(f, grad!, lmo, active_set, verbose=true, line_search=FrankWolfe.Secant(40, 1e-8, domain_oracle), trajectory=true) + x_s, _, primal, dual_gap, traj_data_s, _ = FrankWolfe.blended_pairwise_conditional_gradient(f, grad!, lmo, active_set, verbose=true, line_search=FrankWolfe.Secant(domain_oracle=domain_oracle), trajectory=true) + + lmo = FrankWolfe.ProbabilitySimplexOracle(1.0) + f, grad! = build_d_criterion(A, build_safe=false) + x0, active_set = build_start_point(A) + domain_oracle = build_domain_oracle(A) + x, _, primal, dual_gap, traj_data = decomposition_invariant_conditional_gradient(f, grad!, lmo, x0, verbose=true,line_search=FrankWolfe.Secant(domain_oracle=domain_oracle), trajectory=true) @test traj_data_s[end][1] < traj_data[end][1] @test isapprox(f(x_s), f(x)) From d5f1d65a098338a6a410331c95ad02f10ad775b5 Mon Sep 17 00:00:00 2001 From: Hendrych Date: Tue, 5 Nov 2024 15:41:16 +0100 Subject: [PATCH 18/21] Test DICG with Probability Simplex and MOI LMO. --- examples/optimal_experiment_design.jl | 56 ++++++++++++++++++++++++--- 1 file changed, 51 insertions(+), 5 deletions(-) diff --git a/examples/optimal_experiment_design.jl b/examples/optimal_experiment_design.jl index f60bf50c7..0f7129c77 100644 --- a/examples/optimal_experiment_design.jl +++ b/examples/optimal_experiment_design.jl @@ -4,6 +4,10 @@ using Random using Distributions using LinearAlgebra using Statistics +using SCIP +using MathOptInterface +const MOI = MathOptInterface +using SparseArrays using Test # The Optimal Experiment Design Problem consists of choosing a subset of experiments @@ -43,6 +47,29 @@ function build_data(m) return A end +""" +Build MOI version of the lmo. +""" +function build_moi_lmo(m) + o = SCIP.Optimizer() + MOI.empty!(o) + MOI.set(o, MOI.Silent(), true) + + x = MOI.add_variables(o, m) + + for xi in x + # each var has to be non-negative + MOI.add_constraint(o, xi, MOI.GreaterThan(0.0)) + end + + # sum of all variables has to be less than 1.0 + MOI.add_constraint(o, sum(x, init=0.0), MOI.LessThan(1.0)) + + lmo = FrankWolfe.MathOptLMO(o) + + return lmo +end + """ Check if given point is in the domain of f, i.e. X = transpose(A) * diagm(x) * A positive definite. @@ -87,12 +114,11 @@ function build_start_point(A) V = Vector{Float64}[] for i in S - v = zeros(m) - v[i] = 1.0 + v = FrankWolfe.ScaledHotVector(1.0, i, m) push!(V, v) end - x = sum(V .* 1/n) + x = SparseArrays.SparseVector(sum(V .* 1/n)) active_set= FrankWolfe.ActiveSet(fill(1/n, n), V, x) return x, active_set, S @@ -232,10 +258,20 @@ m = 300 f, grad! = build_a_criterion(A, build_safe=false) x0, active_set = build_start_point(A) domain_oracle = build_domain_oracle(A) - x, _, primal, dual_gap, traj_data = decomposition_invariant_conditional_gradient(f, grad!, lmo, x0, verbose=true,line_search=FrankWolfe.Secant(domain_oracle=domain_oracle), trajectory=true) + x_d, _, primal, dual_gap, traj_data_d = FrankWolfe.decomposition_invariant_conditional_gradient(f, grad!, lmo, x0, verbose=true,line_search=FrankWolfe.Secant(domain_oracle=domain_oracle), trajectory=true) + + lmo = build_moi_lmo(m) + f, grad! = build_a_criterion(A, build_safe=false) + x0, active_set = build_start_point(A) + domain_oracle = build_domain_oracle(A) + x_d_m, _, primal, dual_gap, traj_data_d_m = FrankWolfe.decomposition_invariant_conditional_gradient(f, grad!, lmo, x0, verbose=true,line_search=FrankWolfe.Secant(domain_oracle=domain_oracle), trajectory=true) @test traj_data_s[end][1] < traj_data[end][1] + @test traj_data_d[end][1] <= traj_data_s[end][1] + @test traj_data_d_m[end][1] <= traj_data_s[end][1] @test isapprox(f(x_s), f(x)) + @test isapprox(f(x_s), f(x_d)) + @test isapprox(f(x_s), f(x_d_m)) end @testset "D-Optimal Design" begin @@ -256,10 +292,20 @@ m = 300 f, grad! = build_d_criterion(A, build_safe=false) x0, active_set = build_start_point(A) domain_oracle = build_domain_oracle(A) - x, _, primal, dual_gap, traj_data = decomposition_invariant_conditional_gradient(f, grad!, lmo, x0, verbose=true,line_search=FrankWolfe.Secant(domain_oracle=domain_oracle), trajectory=true) + x_d, _, primal, dual_gap, traj_data_d = FrankWolfe.decomposition_invariant_conditional_gradient(f, grad!, lmo, x0, verbose=true,line_search=FrankWolfe.Secant(domain_oracle=domain_oracle), trajectory=true) + + lmo = build_moi_lmo(m) + f, grad! = build_d_criterion(A, build_safe=false) + x0, active_set = build_start_point(A) + domain_oracle = build_domain_oracle(A) + x_d_m, _, primal, dual_gap, traj_data_d_m = FrankWolfe.decomposition_invariant_conditional_gradient(f, grad!, lmo, x0, verbose=true,line_search=FrankWolfe.Secant(domain_oracle=domain_oracle), trajectory=true) @test traj_data_s[end][1] < traj_data[end][1] + @test traj_data_d[end][1] <= traj_data_s[end][1] + @test traj_data_d_m[end][1] <= traj_data_s[end][1] @test isapprox(f(x_s), f(x)) + @test isapprox(f(x_s), f(x_d)) + @test isapprox(f(x_s), f(x_d_m)) end end From ad596206a8be02efa58bc38a224c2ac0bd133e87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mathieu=20Besan=C3=A7on?= Date: Wed, 6 Nov 2024 09:13:11 +0100 Subject: [PATCH 19/21] fix typo --- src/dicg.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/dicg.jl b/src/dicg.jl index eacf4e707..9b7a749a9 100644 --- a/src/dicg.jl +++ b/src/dicg.jl @@ -124,7 +124,7 @@ function decomposition_invariant_conditional_gradient( println("GRADIENstep_typeYPE: $grad_type LAZY: $lazy lazy_tolerance: $lazy_tolerance") println("LMO: $(typeof(lmo))") if memory_mode isa InplaceEmphasis - @info("In memory_mode memory iterates are wristep_typeen back into x0!") + @info("In memory_mode memory iterates are written back into x0!") end end @@ -350,7 +350,7 @@ function blended_decomposition_invariant_conditional_gradient( println("GRADIENstep_typeYPE: $grad_type LAZY: $lazy lazy_tolerance: $lazy_tolerance") println("LMO: $(typeof(lmo))") if memory_mode isa InplaceEmphasis - @info("In memory_mode memory iterates are wristep_typeen back into x0!") + @info("In memory_mode memory iterates are written back into x0!") end end From be7d5687ae64436cf0d1d312e30b09b8cfc76875 Mon Sep 17 00:00:00 2001 From: Hendrych Date: Thu, 7 Nov 2024 11:42:57 +0100 Subject: [PATCH 20/21] Fix typo. --- src/blended_cg.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/blended_cg.jl b/src/blended_cg.jl index eee21f998..f239b5ca9 100644 --- a/src/blended_cg.jl +++ b/src/blended_cg.jl @@ -163,7 +163,7 @@ function blended_conditional_gradient( end if verbose - println("\nBlended Conditional Gradients Algorithm.") + println("\nBlended Conditional Gradient Algorithm.") NumType = eltype(x) println( "MEMORY_MODE: $memory_mode STEPSIZE: $line_search EPSILON: $epsilon MAXITERATION: $max_iteration TYPE: $NumType", From e5daf0a7ce4f773201880ea847eab6ab90f343aa Mon Sep 17 00:00:00 2001 From: Hendrych Date: Thu, 7 Nov 2024 11:43:32 +0100 Subject: [PATCH 21/21] Add BCG to the tests. --- examples/optimal_experiment_design.jl | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/examples/optimal_experiment_design.jl b/examples/optimal_experiment_design.jl index 0f7129c77..62b0ab5cf 100644 --- a/examples/optimal_experiment_design.jl +++ b/examples/optimal_experiment_design.jl @@ -241,6 +241,7 @@ end m = 300 @testset "Limit Optimal Design Problem" begin @testset "A-Optimal Design" begin + println("A_OPTIMAL DESIGN") A = build_data(m) f, grad! = build_a_criterion(A, build_safe=true) lmo = FrankWolfe.ProbabilitySimplexOracle(1.0) @@ -260,18 +261,18 @@ m = 300 domain_oracle = build_domain_oracle(A) x_d, _, primal, dual_gap, traj_data_d = FrankWolfe.decomposition_invariant_conditional_gradient(f, grad!, lmo, x0, verbose=true,line_search=FrankWolfe.Secant(domain_oracle=domain_oracle), trajectory=true) - lmo = build_moi_lmo(m) + lmo = FrankWolfe.ProbabilitySimplexOracle(1.0) f, grad! = build_a_criterion(A, build_safe=false) x0, active_set = build_start_point(A) domain_oracle = build_domain_oracle(A) - x_d_m, _, primal, dual_gap, traj_data_d_m = FrankWolfe.decomposition_invariant_conditional_gradient(f, grad!, lmo, x0, verbose=true,line_search=FrankWolfe.Secant(domain_oracle=domain_oracle), trajectory=true) + x_b, _, primal, dual_gap, traj_data_b, _ = FrankWolfe.blended_conditional_gradient(f, grad!, lmo, x0, verbose=true, trajectory=true,line_search=FrankWolfe.Secant(domain_oracle=domain_oracle)) @test traj_data_s[end][1] < traj_data[end][1] @test traj_data_d[end][1] <= traj_data_s[end][1] - @test traj_data_d_m[end][1] <= traj_data_s[end][1] + @test traj_data_b[end][1] <= traj_data_s[end][1] @test isapprox(f(x_s), f(x)) @test isapprox(f(x_s), f(x_d)) - @test isapprox(f(x_s), f(x_d_m)) + @test isapprox(f(x_s), f(x_b)) end @testset "D-Optimal Design" begin @@ -294,18 +295,19 @@ m = 300 domain_oracle = build_domain_oracle(A) x_d, _, primal, dual_gap, traj_data_d = FrankWolfe.decomposition_invariant_conditional_gradient(f, grad!, lmo, x0, verbose=true,line_search=FrankWolfe.Secant(domain_oracle=domain_oracle), trajectory=true) - lmo = build_moi_lmo(m) + domain_oracle = build_domain_oracle(A) + lmo = FrankWolfe.ProbabilitySimplexOracle(1.0) f, grad! = build_d_criterion(A, build_safe=false) x0, active_set = build_start_point(A) domain_oracle = build_domain_oracle(A) - x_d_m, _, primal, dual_gap, traj_data_d_m = FrankWolfe.decomposition_invariant_conditional_gradient(f, grad!, lmo, x0, verbose=true,line_search=FrankWolfe.Secant(domain_oracle=domain_oracle), trajectory=true) + x_b, _, primal, dual_gap, traj_data_b, _ = FrankWolfe.blended_conditional_gradient(f, grad!, lmo, x0, verbose=true, trajectory=true,line_search=FrankWolfe.Secant(domain_oracle=domain_oracle)) @test traj_data_s[end][1] < traj_data[end][1] @test traj_data_d[end][1] <= traj_data_s[end][1] - @test traj_data_d_m[end][1] <= traj_data_s[end][1] + @test traj_data_b[end][1] <= traj_data_s[end][1] @test isapprox(f(x_s), f(x)) @test isapprox(f(x_s), f(x_d)) - @test isapprox(f(x_s), f(x_d_m)) + @test isapprox(f(x_s), f(x_b)) end end