diff --git a/examples/birkhoff.jl b/examples/birkhoff.jl index 655ef7ccf..7e29ec4ba 100644 --- a/examples/birkhoff.jl +++ b/examples/birkhoff.jl @@ -76,12 +76,12 @@ function build_birkhoff_lmo() # doubly stochastic constraints MOI.add_constraint.( o, - vec(sum(X[i], dims=1, init=MOI.ScalarAffineFunction{Float64}([], 0.0))), + X[i] * ones(n), MOI.EqualTo(1.0), ) MOI.add_constraint.( o, - vec(sum(X[i], dims=2, init=MOI.ScalarAffineFunction{Float64}([], 0.0))), + X[i]' * ones(n), MOI.EqualTo(1.0), ) # 0 ≤ Y_i ≤ X_i diff --git a/examples/optimal_experiment_design.jl b/examples/optimal_experiment_design.jl index 198991a7f..d9078d0c5 100644 --- a/examples/optimal_experiment_design.jl +++ b/examples/optimal_experiment_design.jl @@ -40,88 +40,114 @@ include("oed_utils.jl") https://github.com/ZIB-IOL/FrankWolfe.jl/blob/master/examples/optimal_experiment_design.jl """ -m = 40 + +m = 50 verbose = true ## A-Optimal Design Problem - @testset "A-Optimal Design" begin Ex_mat, n, N, ub = build_data(m) - # sharpness constants - σ = minimum(Ex_mat' * Ex_mat) - λ_max = maximum(ub) * maximum([norm(Ex_mat[i,:])^2 for i=1:size(Ex_mat,1)]) - θ = 1/2 - M = sqrt(λ_max^3/ n * σ^4) - - - g, grad! = build_a_criterion(Ex_mat, build_safe=true) + g, grad! = build_a_criterion(Ex_mat, build_safe=false) blmo = build_blmo(m, N, ub) - x0, active_set = build_start_point(Ex_mat, N, ub) - z = greedy_incumbent(Ex_mat, N, ub) - domain_oracle = build_domain_oracle(Ex_mat, n) - line_search = FrankWolfe.MonotonicGenericStepsize(FrankWolfe.Adaptive(), domain_oracle) heu = Boscia.Heuristic(Boscia.rounding_hyperplane_heuristic, 0.7, :hyperplane_aware_rounding) + domain_oracle = build_domain_oracle(Ex_mat, n) - x, _, result = Boscia.solve(g, grad!, blmo, active_set=active_set, start_solution=z, time_limit=10, verbose=false, domain_oracle=domain_oracle, custom_heuristics=[heu], sharpness_exponent=θ, sharpness_constant=M, line_search=line_search) - - _, active_set = build_start_point(Ex_mat, N, ub) + # precompile + line_search = FrankWolfe.MonotonicGenericStepsize(FrankWolfe.Adaptive(), domain_oracle) + x0, active_set = build_start_point(Ex_mat, N, ub) z = greedy_incumbent(Ex_mat, N, ub) - x, _, result = Boscia.solve(g, grad!, blmo, active_set=active_set, start_solution=z, verbose=verbose, domain_oracle=domain_oracle, custom_heuristics=[heu], sharpness_exponent=θ, sharpness_constant=M, line_search=line_search) #sharpness_exponent=θ, sharpness_constant=M, - - gradient = similar(x) - grad!(gradient, x) - v = Boscia.compute_extreme_point(blmo, gradient) - dual_gap = FrankWolfe.dot(gradient, x) - FrankWolfe.dot(gradient, v) - @show dual_gap + x, _, _ = Boscia.solve(g, grad!, blmo, active_set=active_set, start_solution=z, time_limit=10, verbose=false, domain_oracle=domain_oracle, custom_heuristics=[heu], line_search=line_search) - blmo = build_blmo(m, N, ub) - _, active_set = build_start_point(Ex_mat, N, ub) + # proper run with MGLS and Adaptive + line_search = FrankWolfe.MonotonicGenericStepsize(FrankWolfe.Adaptive(), domain_oracle) + x0, active_set = build_start_point(Ex_mat, N, ub) z = greedy_incumbent(Ex_mat, N, ub) - domain_oracle = build_domain_oracle(Ex_mat, n) - heu = Boscia.Heuristic(Boscia.rounding_hyperplane_heuristic, 0.7, :hyperplane_aware_rounding) - - x_s, _, result = Boscia.solve(g, grad!, blmo, active_set=active_set, start_solution=z, verbose=verbose, line_search=FrankWolfe.Secant(domain_oracle=domain_oracle), domain_oracle=domain_oracle, sharpness_exponent=θ, sharpness_constant=M, custom_heuristics=[heu]) #sharpness_exponent=θ, sharpness_constant=M, - - @show x - @show x_s - @show g(x), g(x_s) - @test isapprox(g(x), g(x_s), atol=1e-3, rtol=5e-2) + x, _, result = Boscia.solve( + g, + grad!, + blmo, + active_set=active_set, + start_solution=z, + verbose=verbose, + domain_oracle=domain_oracle, + custom_heuristics=[heu], + line_search=line_search, + ) + + # Run with Secant + x0, active_set = build_start_point(Ex_mat, N, ub) + z = greedy_incumbent(Ex_mat, N, ub) + line_search = FrankWolfe.Secant(domain_oracle=domain_oracle) + + x_s, _, result_s = Boscia.solve( + g, + grad!, + blmo, + active_set=active_set, + start_solution=z, + verbose=verbose, + domain_oracle=domain_oracle, + custom_heuristics=[heu], + line_search=line_search, + ) + + @test result_s[:dual_bound] <= g(x) + 1e-4 + @test result[:dual_bound] <= g(x_s) + 1e-4 + @test isapprox(g(x), g(x_s), atol=1e-3) end ## D-Optimal Design Problem - @testset "D-optimal Design" begin Ex_mat, n, N, ub = build_data(m) - # sharpness constants - σ = minimum(Ex_mat' * Ex_mat) - λ_max = maximum(ub) * maximum([norm(Ex_mat[i,:])^2 for i=1:size(Ex_mat,1)]) - θ = 1/2 - M = sqrt(2 * λ_max^2/ n * σ^4 ) - - - g, grad! = build_d_criterion(Ex_mat, build_safe=true) + g, grad! = build_d_criterion(Ex_mat, build_safe=false) blmo = build_blmo(m, N, ub) - x0, active_set = build_start_point(Ex_mat, N, ub) - z = greedy_incumbent(Ex_mat, N, ub) - domain_oracle = build_domain_oracle(Ex_mat, n) heu = Boscia.Heuristic(Boscia.rounding_hyperplane_heuristic, 0.7, :hyperplane_aware_rounding) + domain_oracle = build_domain_oracle(Ex_mat, n) - x, _, result = Boscia.solve(g, grad!, blmo, active_set=active_set, start_solution=z, verbose=verbose, domain_oracle=domain_oracle, sharpness_exponent=θ, sharpness_constant=M, custom_heuristics=[heu]) - - blmo = build_blmo(m, N, ub) + # precompile + line_search = FrankWolfe.MonotonicGenericStepsize(FrankWolfe.Adaptive(), domain_oracle) x0, active_set = build_start_point(Ex_mat, N, ub) z = greedy_incumbent(Ex_mat, N, ub) - domain_oracle = build_domain_oracle(Ex_mat, n) - heu = Boscia.Heuristic(Boscia.rounding_hyperplane_heuristic, 0.7, :hyperplane_aware_rounding) + x, _, _ = Boscia.solve(g, grad!, blmo, active_set=active_set, start_solution=z, time_limit=10, verbose=false, domain_oracle=domain_oracle, custom_heuristics=[heu], line_search=line_search) - x_s, _, result = Boscia.solve(g, grad!, blmo, active_set=active_set, start_solution=z, verbose=verbose, line_search=FrankWolfe.Secant(domain_oracle=domain_oracle), domain_oracle=domain_oracle, sharpness_exponent=θ, sharpness_constant=M, custom_heuristics=[heu]) - - @show x - @show x_s - @show g(x), g(x_s) - @test isapprox(g(x), g(x_s), atol=1e-4, rtol=1e-2) + # proper run with MGLS and Adaptive + line_search = FrankWolfe.MonotonicGenericStepsize(FrankWolfe.Adaptive(), domain_oracle) + x0, active_set = build_start_point(Ex_mat, N, ub) + z = greedy_incumbent(Ex_mat, N, ub) + x, _, result = Boscia.solve( + g, + grad!, + blmo, + active_set=active_set, + start_solution=z, + verbose=verbose, + domain_oracle=domain_oracle, + custom_heuristics=[heu], + line_search=line_search, + ) + + # Run with Secant + x0, active_set = build_start_point(Ex_mat, N, ub) + z = greedy_incumbent(Ex_mat, N, ub) + line_search = FrankWolfe.Secant(domain_oracle=domain_oracle) + + x_s, _, result_s = Boscia.solve( + g, + grad!, + blmo, + active_set=active_set, + start_solution=z, + verbose=verbose, + domain_oracle=domain_oracle, + custom_heuristics=[heu], + line_search=line_search, + ) + + @test result_s[:dual_bound] <= g(x) + @test result[:dual_bound] <= g(x_s) + @test isapprox(g(x), g(x_s), rtol=1e-2) end diff --git a/src/node.jl b/src/node.jl index 7b1144f33..1a201904e 100644 --- a/src/node.jl +++ b/src/node.jl @@ -283,7 +283,7 @@ function Bonobo.evaluate_node!(tree::Bonobo.BnBTree, node::FrankWolfeNode) elseif node.id == 1 @debug "Lower bound of root node: $(lower_bound)" @debug "Current incumbent: $(tree.incumbent)" - @assert lower_bound <= tree.incumbent + node.fw_dual_gap_limit "lower_bound <= tree.incumbent + node.fw_dual_gap_limit : $(lower_bound) <= $(tree.incumbent + node.fw_dual_gap_limit)" + @assert lower_bound <= tree.incumbent + dual_gap "lower_bound <= tree.incumbent + dual_gap : $(lower_bound) <= $(tree.incumbent + dual_gap)" end # Call heuristic diff --git a/src/tightenings.jl b/src/tightenings.jl index 2cde12a4f..eb1510371 100644 --- a/src/tightenings.jl +++ b/src/tightenings.jl @@ -218,7 +218,13 @@ function tightening_lowerbound(tree, node, x, lower_bound) @debug "Using sharpness θ=$θ and M=$M" fx = tree.root.problem.f(x) + if node.dual_gap < 0.0 + @assert abs(node.dual_gap) > eps() "node dual gap is negative: $(node.dual_gap)" + node.dual_gap = 0.0 + end + sharpness_bound = M^(- 1 / θ) * 1/ 2 * (sqrt(bound_improvement) - M / 2 * node.dual_gap^θ)^(1 / θ) + fx - node.dual_gap + @debug "Sharpness: $lower_bound -> $sharpness_bound" @assert num_fractional == 0 || sharpness_bound >= lower_bound "$(num_fractional) == 0 || $(sharpness_bound) > $(lower_bound)" end @@ -283,10 +289,6 @@ function prune_children(tree, node, lower_bound_base, x, vidx) @debug "prune right, from $(node.lb) -> $new_bound_right, ub $(tree.incumbent), lb $(node.lb)" prune_right = true end - @assert !( - (new_bound_left > tree.incumbent + tree.root.options[:dual_gap]) && - (new_bound_right > tree.incumbent + tree.root.options[:dual_gap]) - ) end end diff --git a/src/utilities.jl b/src/utilities.jl index 814b6430a..d56efdb13 100644 --- a/src/utilities.jl +++ b/src/utilities.jl @@ -229,6 +229,42 @@ function sparse_min_via_enum(f, n, k, values=fill(0:1, n)) return best_val, best_sol end +function min_via_enum_simplex(f, n, N, values=fill(0:1,n)) + solutions = Iterators.product(values...) + best_val = Inf + best_sol = nothing + for sol in solutions + sol_vec = collect(sol) + if sum(sol_vec) > N + continue + end + val = f(sol_vec) + if best_val > val + best_val = val + best_sol = sol_vec + end + end + return best_val, best_sol +end + +function min_via_enum_prob_simplex(f, n, N, values=fill(0:1,n)) + solutions = Iterators.product(values...) + best_val = Inf + best_sol = nothing + for sol in solutions + sol_vec = collect(sol) + if sum(sol_vec) != N + continue + end + val = f(sol_vec) + if best_val > val + best_val = val + best_sol = sol_vec + end + end + return best_val, best_sol +end + # utility function to print the values of the parameters _value_to_print(::Bonobo.BestFirstSearch) = "Move best bound" diff --git a/test/strong_convexity_and_sharpness.jl b/test/strong_convexity_and_sharpness.jl new file mode 100644 index 000000000..0837a2d5b --- /dev/null +++ b/test/strong_convexity_and_sharpness.jl @@ -0,0 +1,213 @@ +using Boscia +using Test +using Random +using LinearAlgebra +using FrankWolfe + +## Log barrier +# min_x - ∑ log(xi + ϵ) - log(N - ∑ xi + ϵ) +# s.t. x ∈ {0,1}^n +# Strong convexity: μ = 1 / (1 + ϵ)^2n +# Sharpness: M = sqrt(2/μ), θ = 1/2 + +## General convex quadratic +# min_x 1/2 x' * Q * x + b' * x +# s.t. ∑ x = N +# x ∈ {0,1}^n +# Strong convexity: μ = minimum(eigvals(Q)) +# Sharpness: M = sqrt(2/μ), θ = 1/2 + +@testset "Strong convexity" begin + + @testset "Log barrier" begin + n = 50 + N = Int(floor(3/4 * n)) + ϵ = 1e-3 + + function f(x) + return - sum(log(xi + ϵ) for xi in x) - log(N - sum(x) + ϵ) + end + + function grad!(storage, x) + storage .= - 1 ./ (x .+ ϵ) .- 1/sum(N - sum(x) + ϵ) + return storage + + end + + int_vars = collect(1:n) + sblmo = Boscia.UnitSimplexSimpleBLMO(N) + line_search = FrankWolfe.Adaptive() + + x, _, result = Boscia.solve( + f, + grad!, + sblmo, + fill(0.0, n), + fill(floor(N/2), n), + int_vars, + n, + verbose=true, + line_search=line_search, + time_limit=120, + print_iter=1000, + ) + + μ = 1/(1 + ϵ)^(2*n) + x_sc, _, result_sc = Boscia.solve( + f, + grad!, + sblmo, + fill(0.0, n), + fill(floor(N/2), n), + int_vars, + n, + verbose=true, + line_search=line_search, + strong_convexity=μ, + time_limit=120, + print_iter=1000, + ) + + @test f(x_sc) <= f(x) + 1e-6 + @test result_sc[:dual_bound] > result[:dual_bound] + end + + @testset "General convex quadratic" begin + n = 20 + N = Int(floor(n/2)) + Q = rand(n, n) + Q = Q' * Q + @assert isposdef(Q) + + b = rand(n) + + function f(x) + return 1/2 * x' * Q * x - b' * x + end + + function grad!(storage, x) + storage .= Q * x - b + return storage + end + + val, sol = Boscia.min_via_enum_prob_simplex(f, n, N) + + blmo = Boscia.ProbabilitySimplexSimpleBLMO(N) + μ = minimum(eigvals(Q)) + + x, _, _ = Boscia.solve( + f, + grad!, + blmo, + fill(0.0, n), + fill(1.0, n), + collect(1:n), + n, + strong_convexity=μ, + verbose=true, + ) + + @test isapprox(f(x), f(sol), atol=1e-5, rtol=1e-2) + @test sum(x .== sol) == n + end +end + +@testset "Sharpness" begin + + @testset "Log barrier" begin + n = 50 + N = Int(floor(3/4 * n)) + ϵ = 1e-3 + + function f(x) + return - sum(log(xi + ϵ) for xi in x) - log(N - sum(x) + ϵ) + end + + function grad!(storage, x) + storage .= - 1 ./ (x .+ ϵ) .- 1/sum(N - sum(x) + ϵ) + return storage + + end + + int_vars = collect(1:n) + sblmo = Boscia.UnitSimplexSimpleBLMO(N) + line_search = FrankWolfe.Adaptive() + + x, _, result = Boscia.solve( + f, + grad!, + sblmo, + fill(0.0, n), + fill(floor(N/2), n), + int_vars, + n, + verbose=true, + line_search=line_search, + time_limit=120, + print_iter=1000, + ) + + μ = 1/(1 + ϵ)^(2*n) + θ = 1/2 + M = sqrt(2/μ) + x_sc, _, result_sc = Boscia.solve( + f, + grad!, + sblmo, + fill(0.0, n), + fill(floor(N/2), n), + int_vars, + n, + verbose=true, + line_search=line_search, + sharpness_constant=M, + sharpness_exponent=θ, + time_limit=120, + print_iter=1000, + ) + + @test f(x_sc) <= f(x) + 1e-6 + @test result_sc[:dual_bound] >= result[:dual_bound] + end + + @testset "General convex quadratic" begin + n = 20 + N = Int(floor(n/2)) + Q = rand(n, n) + Q = Q' * Q + @assert isposdef(Q) + + b = rand(n) + + function f(x) + return 1/2 * x' * Q * x - b' * x + end + + function grad!(storage, x) + storage .= Q * x - b + return storage + end + val, sol = Boscia.min_via_enum_prob_simplex(f, n, N) + + blmo = Boscia.ProbabilitySimplexSimpleBLMO(N) + μ = minimum(eigvals(Q)) + θ = 1/2 + M = sqrt(2/μ) + + x, _, _ = Boscia.solve( + f, + grad!, + blmo, + fill(0.0, n), + fill(1.0, n), + collect(1:n), + n, + sharpness_constant=M, + sharpness_exponent=θ, + verbose=true, + ) + + @test isapprox(f(x), f(sol), atol=1e-5, rtol=1e-2) + @test sum( x .== sol) == n + end +end \ No newline at end of file diff --git a/test/time_limit.jl b/test/time_limit.jl index d3996f968..a42e86fea 100644 --- a/test/time_limit.jl +++ b/test/time_limit.jl @@ -58,7 +58,7 @@ time_limit = 30.0 start_time = Dates.now() x, _, result = Boscia.solve(f, grad!, lmo, verbose=true, time_limit=time_limit) time_taken = float(Dates.value(Dates.now() - start_time)) / 1000 - @test sum(ai' * x) <= bi + 1e-6 + @test sum(ai' * x) <= bi + 1e-3 @test f(x) <= f(result[:raw_solution]) + 1e-6 @test result[:total_time_in_sec] <= time_limit + 5 end