Benchmark of Frank-Wolfe variants for sparse logistic regressionΒΆ

Comparison of different Frank-Wolfe variants on various problems with a logistic regression loss (copt.utils.LogLoss()) and a L1 ball constraint (copt.utils.L1Ball()).

Traceback (most recent call last):
  File "/home/pedregosa/dev/sphinx-gallery/sphinx_gallery/gen_rst.py", line 435, in _memory_usage
    multiprocess=True)
  File "/home/pedregosa/dev/memory_profiler/memory_profiler.py", line 343, in memory_usage
    returned = f(*args, **kw)
  File "/home/pedregosa/dev/sphinx-gallery/sphinx_gallery/gen_rst.py", line 426, in __call__
    exec(self.code, self.globals)
  File "/home/pedregosa/dev/copt/examples/frank_wolfe/plot_sparse_benchmark.py", line 32, in <module>
    X, y = load_data()
  File "/home/pedregosa/dev/copt/copt/datasets.py", line 155, in load_madelon
    return _load_dataset("madelon", subset, data_dir)
  File "/home/pedregosa/dev/copt/copt/datasets.py", line 54, in _load_dataset
    makedirs(dataset_dir)
  File "/home/pedregosa/anaconda3/lib/python3.7/os.py", line 221, in makedirs
    mkdir(name, mode)
FileExistsError: [Errno 17] File exists: '/home/pedregosa/copt_data/madelon'
import matplotlib.pyplot as plt
import numpy as np
import copt as cp

# .. datasets and their loading functions ..
datasets = [
    ("Gisette", cp.datasets.load_gisette, 6e3),
    ("RCV1", cp.datasets.load_rcv1, 2e4),
    ("Madelon", cp.datasets.load_madelon, 20.0),
    ("Covtype", cp.datasets.load_covtype, 200.0),
]


variants_fw = [
    ["backtracking", "adaptive step-size", "s"],
    ["DR", "Lipschitz step-size", "<"],
]

for dataset_title, load_data, alpha in datasets:
    plt.figure()
    print("Running on the %s dataset" % dataset_title)

    X, y = load_data()
    n_samples, n_features = X.shape

    l1_ball = cp.utils.L1Ball(alpha)
    f = cp.utils.LogLoss(X, y)
    x0 = np.zeros(n_features)

    for step, label, marker in variants_fw:

        cb = cp.utils.Trace(f)
        sol = cp.minimize_frank_wolfe(
            f.f_grad, x0, l1_ball.lmo, callback=cb, step=step, lipschitz=f.lipschitz
        )

        plt.plot(cb.trace_time, cb.trace_fx, label=label, marker=marker, markevery=10)

    print("Sparsity of solution: %s" % np.mean(np.abs(sol.x) > 1e-8))
    plt.legend()
    plt.xlabel("Time (in seconds)")
    plt.ylabel("Objective function")
    plt.title(dataset_title)
    plt.tight_layout()  # otherwise the right y-label is slightly clipped
    plt.xlim((0, 0.7 * cb.trace_time[-1]))  # for aesthetics
    plt.grid()
    plt.show()

Total running time of the script: ( 11 minutes 46.105 seconds)

Estimated memory usage: 8 MB

Gallery generated by Sphinx-Gallery