Source code for pyunlocbox.acceleration

# -*- coding: utf-8 -*-

r"""
The :mod:`pyunlocbox.acceleration` module implements acceleration schemes for
use with the :mod:`pyunlocbox.solvers`. Pass a given acceleration object as an
argument to your chosen solver during its initialization so that the solver can
use it.

Interface
---------

The :class:`accel` base class defines a common interface to all acceleration
schemes:

.. autosummary::

    accel.pre
    accel.update_step
    accel.update_sol
    accel.post

Acceleration schemes
--------------------

Then, derived classes implement various common acceleration schemes.

.. autosummary::

    dummy
    backtracking
    fista
    fista_backtracking
    regularized_nonlinear

"""

import copy
import logging
import warnings

import numpy as np
from scipy.optimize.linesearch import line_search_armijo


[docs]class accel(object): r""" Defines the acceleration scheme object interface. This class defines the interface of an acceleration scheme object intended to be passed to a solver inheriting from :class:`pyunlocbox.solvers.solver`. It is intended to be a base class for standard acceleration schemes which will implement the required methods. It can also be instantiated by user code and dynamically modified for rapid testing. This class also defines the generic attributes of all acceleration scheme objects. """ def __init__(self): pass
[docs] def pre(self, functions, x0): """ Pre-processing specific to the acceleration scheme. Gets called when :func:`pyunlocbox.solvers.solve` starts running. """ self._pre(functions, x0)
def _pre(self, functions, x0): raise NotImplementedError("Class user should define this method.")
[docs] def update_step(self, solver, objective, niter): """ Update the step size for the next iteration. Parameters ---------- solver : pyunlocbox.solvers.solver Solver on which to act. objective : list of floats List of evaluations of the objective function since the beginning of the iterative process. niter : int Current iteration number. Returns ------- float Updated step size. """ return self._update_step(solver, objective, niter)
def _update_step(self, solver, objective, niter): raise NotImplementedError("Class user should define this method.")
[docs] def update_sol(self, solver, objective, niter): """ Update the solution point for the next iteration. Parameters ---------- solver : pyunlocbox.solvers.solver Solver on which to act. objective : list of floats List of evaluations of the objective function since the beginning of the iterative process. niter : int Current iteration number. Returns ------- array_like Updated solution point. """ return self._update_sol(solver, objective, niter)
def _update_sol(self, solver, objective, niter): raise NotImplementedError("Class user should define this method.")
[docs] def post(self): """ Post-processing specific to the acceleration scheme. Mainly used to delete references added during initialization so that the garbage collector can free the memory. Gets called when :func:`pyunlocbox.solvers.solve` finishes running. """ self._post()
def _post(self): raise NotImplementedError("Class user should define this method.")
[docs]class dummy(accel): r""" Dummy acceleration scheme which does nothing. Used by default in most of the solvers. It simply returns unaltered the step size and solution point it receives. """ def _pre(self, functions, x0): pass def _update_step(self, solver, objective, niter): return solver.step def _update_sol(self, solver, objective, niter): return solver.sol def _post(self): pass
# ----------------------------------------------------------------------------- # Stepsize optimizers # -----------------------------------------------------------------------------
[docs]class backtracking(dummy): r""" Backtracking based on a local quadratic approximation of the smooth part of the objective. Parameters ---------- eta : float A number between 0 and 1 representing the ratio of the geometric sequence formed by successive step sizes. In other words, it establishes the relation `step_new = eta * step_old`. Default is 0.5. Notes ----- This is the backtracking strategy used in the original FISTA paper, :cite:`beck2009FISTA`. Examples -------- >>> import numpy as np >>> from pyunlocbox import functions, solvers, acceleration >>> y = [4, 5, 6, 7] >>> x0 = np.zeros(len(y)) >>> f1 = functions.norm_l1(y=y, lambda_=1.0) >>> f2 = functions.norm_l2(y=y, lambda_=0.8) >>> accel = acceleration.backtracking() >>> solver = solvers.forward_backward(accel=accel, step=10) >>> ret = solvers.solve([f1, f2], x0, solver, atol=1e-32, rtol=None) Solution found after 4 iterations: objective function f(sol) = 0.000000e+00 stopping criterion: ATOL >>> ret['sol'] array([ 4., 5., 6., 7.]) """ def __init__(self, eta=0.5, **kwargs): if (eta > 1) or (eta <= 0): raise ValueError("eta must be between 0 and 1.") self.eta = eta super(backtracking, self).__init__(**kwargs) def _update_step(self, solver, objective, niter): """ Notes ----- TODO: For now we're recomputing gradients in order to evaluate the backtracking criterion. In the future, it might be interesting to think of some design changes so that this function has access to the gradients directly. Since the call to `solver._algo()` modifies the internal state of the solver itself, we need to store the solver's property values before doing backtracking, and then restore the solver's state after backtracking is done. This takes more memory, but it's the only way to guarantee that backtracking is performing on a fixed solver state. """ # Save current state of the solver properties = copy.deepcopy(vars(solver)) logging.debug('(Begin) solver properties: {}'.format(properties)) # Initialize some useful variables fn = 0 grad = np.zeros(properties['sol'].shape) for f in solver.smooth_funs: fn += f.eval(properties['sol']) grad += f.grad(properties['sol']) step = properties['step'] logging.debug('fn = {}'.format(fn)) while True: # Run the solver with the current stepsize solver.step = step logging.debug('Current step: {}'.format(step)) solver._algo() logging.debug( '(During) solver properties: {}'.format(vars(solver))) # Record results fp = np.sum([f.eval(solver.sol) for f in solver.smooth_funs]) logging.debug('fp = {}'.format(fp)) dot_prod = np.dot(solver.sol - properties['sol'], grad) logging.debug('dot_prod = {}'.format(dot_prod)) norm_diff = np.sum((solver.sol - properties['sol'])**2) logging.debug('norm_diff = {}'.format(norm_diff)) # Restore the previous state of the solver for key, val in properties.items(): setattr(solver, key, copy.copy(val)) logging.debug('(Reset) solver properties: {}'.format(vars(solver))) if (2. * step * (fp - fn - dot_prod) <= norm_diff): logging.debug('Break condition reached') break else: logging.debug('Decreasing step') step *= self.eta return step
# ----------------------------------------------------------------------------- # Solution point optimizers # -----------------------------------------------------------------------------
[docs]class fista(dummy): r""" Acceleration scheme for forward-backward solvers. Notes ----- This is the acceleration scheme proposed in the original FISTA paper, :cite:`beck2009FISTA`. Examples -------- >>> import numpy as np >>> from pyunlocbox import functions, solvers, acceleration >>> y = [4, 5, 6, 7] >>> x0 = np.zeros(len(y)) >>> f1 = functions.norm_l2(y=y) >>> f2 = functions.dummy() >>> accel=acceleration.fista() >>> solver = solvers.forward_backward(accel=accel, step=0.5) >>> ret = solvers.solve([f1, f2], x0, solver, atol=1e-5) Solution found after 15 iterations: objective function f(sol) = 4.957288e-07 stopping criterion: ATOL >>> ret['sol'] array([ 4.0002509 , 5.00031362, 6.00037635, 7.00043907]) """ def __init__(self, **kwargs): self.t = 1. super(fista, self).__init__(**kwargs) def _pre(self, functions, x0): self.sol = np.array(x0, copy=True) def _update_sol(self, solver, objective, niter): self.t = 1. if (niter == 1) else self.t # Restart variable t if needed t = (1. + np.sqrt(1. + 4. * self.t**2.)) / 2. y = solver.sol + ((self.t - 1) / t) * (solver.sol - self.sol) self.t = t self.sol[:] = solver.sol return y def _post(self): del self.sol
[docs]class regularized_nonlinear(dummy): r""" Regularized nonlinear acceleration (RNA) for gradient descent. Parameters ---------- k : int, optional Number of points to keep in the buffer for computing the extrapolation. (Default is 10.) lambda_ : float or list of floats, optional Regularization parameter in the acceleration scheme. The user can pass a list of candidates, and the acceleration algorithm will pick the one that provides the best extrapolation. (Default is 1e-6.) adaptive : boolean, optional If adaptive = True and the user has not provided a list of regularization parameters, the acceleration algorithm will assemble a grid of possible regularization parameters based on the SVD of the Gram matrix of vectors of differences in the extrapolation buffer. If adaptive = False, the algorithm will simply try to use the value(s) given in `lambda_`. (Default is True.) dolinesearch : boolean, optional If dolinesearch = True, the acceleration scheme will try to return a point in the line segment between the current extrapolation and the previous one that provides a decrease in the value of the objective function. If dolinesearch = False, the algorithm simply returns the current extrapolation. (Default is True.) forcedecrease : boolean, optional If forcedecrese = True and we obtain a bad extrapolation, the algorithm returns the unchanged solution produced by the solver. If forcedecrease = False, the algorithm returns the new extrapolation no matter what. (Default is True.) Notes ----- This is the acceleration scheme proposed in :cite:`scieur2016`. See also Damien Scieur's `repository <https://github.com/windows7lover /RegularizedNonlinearAcceleration>`_ for the Matlab version that inspired this implementation. Examples -------- >>> import numpy as np >>> from pyunlocbox import functions, solvers, acceleration >>> dim = 25; >>> np.random.seed(0) >>> xstar = np.random.rand(dim) # True solution >>> x0 = np.random.rand(dim) >>> x0 = xstar + 5.*(x0 - xstar) / np.linalg.norm(x0 - xstar) >>> A = np.random.rand(dim, dim) >>> step = 1/np.linalg.norm(np.dot(A.T, A)) >>> f = functions.norm_l2(lambda_=0.5, A=A, y=np.dot(A, xstar)) >>> fd = functions.dummy() >>> accel = acceleration.regularized_nonlinear(k=5) >>> solver = solvers.gradient_descent(step=step, accel=accel) >>> params = {'rtol':0, 'maxit':200, 'verbosity':'NONE'} >>> ret = solvers.solve([f, fd], x0, solver, **params) >>> pctdiff = 100*np.sum((xstar - ret['sol'])**2)/np.sum(xstar**2) >>> print('Difference: {0:.1f}%'.format(pctdiff)) Difference: 1.3% """ def __init__(self, k=10, lambda_=1e-6, adaptive=True, dolinesearch=True, forcedecrease=True, **kwargs): self.k = k self.lambda_ = lambda_ self.adaptive = adaptive self.dolinesearch = dolinesearch self.forcedecrease = forcedecrease @property def lambda_(self): return self._lambda_ @lambda_.setter def lambda_(self, lambda_): try: self._lambda_ = [float(elem) for elem in lambda_] except TypeError: try: self._lambda_ = [float(lambda_)] except ValueError as err: print('lambda_ is not a number') raise err except ValueError as err: print('lambda_ is not a list of numbers') raise err def _pre(self, functions, x0): self.buffer = [] self.functions = functions def _update_sol(self, solver, objective, niter): if (niter % (self.k + 1)) == 0: # Extrapolate at each k iterations self.buffer.append(solver.sol) # (Normalized) matrix of differences U = np.diff(self.buffer, axis=0) UU = np.dot(U, U.T) UU /= np.linalg.norm(UU) # If no parameter grid was provided, assemble one. if self.adaptive and (len(self.lambda_) <= 1): svals = np.sort(np.abs(np.linalg.eigvals(UU))) svals = np.log(svals) svals = 0.5 * (svals[:-1] + svals[1:]) self.lambda_ = np.concatenate(([0.], np.exp(svals))) # Grid search for the best parameter for the extrapolation fvals = [] c = np.zeros((self.k,)) extrap = np.zeros(np.shape(solver.sol)) for lambda_ in self.lambda_: # Coefficients of the extrapolation c[:] = np.linalg.solve(UU + lambda_ * np.eye(self.k), np.ones(self.k)) c[:] /= np.sum(c) extrap[:] = np.dot(np.asarray(self.buffer[:-1]).T, c) fvals.append(np.sum([f.eval(extrap) for f in self.functions])) if self.forcedecrease and (min(fvals) > np.sum(objective[-1])): # If we have bad extrapolations, keep solution as is extrap[:] = solver.sol else: # Return the best extrapolation from the grid search lambda_ = self.lambda_[fvals.index(min(fvals))] # We can afford to solve the linear system here again because # self.k is normally very small. Alternatively, we could have # kept track of the best extrapolations during the grid search, # but that would require at least double the memory, as we'd # have to store both the current extrapolation and the best # extrapolation. c[:] = np.linalg.solve(UU + lambda_ * np.eye(self.k), np.ones(self.k)) c[:] /= np.sum(c) extrap[:] = np.dot(np.asarray(self.buffer[:-1]).T, c) # Improve proposal with line search if self.dolinesearch: # Objective evaluation functional def f(x): return np.sum([f.eval(x) for f in self.functions]) # Solution at previous extrapolation xk = self.buffer[0] # Search direction pk = extrap - xk # Objective value during the previous extrapolation old_fval = np.sum(objective[-self.k]) a, fc, fa = line_search_armijo(f=f, xk=xk, pk=pk, gfk=-pk, old_fval=old_fval, c1=1e-4, alpha0=1.) # New point proposal if a is None: warnings.warn('Line search failed to find good step size') else: extrap[:] = xk + a * pk # Clear buffer and parameter grid for next extrapolation process self.buffer = [] self.lambda_ = [] if self.adaptive else self.lambda_ return extrap else: # Gather points for future extrapolation self.buffer.append(copy.copy(solver.sol)) return solver.sol def _post(self): del self.buffer, self.functions
# ----------------------------------------------------------------------------- # Mixed optimizers # -----------------------------------------------------------------------------
[docs]class fista_backtracking(backtracking, fista): r""" Acceleration scheme with backtracking for forward-backward solvers. Notes ----- This is the acceleration scheme and backtracking strategy proposed in the original FISTA paper, :cite:`beck2009FISTA`. Examples -------- >>> import numpy as np >>> from pyunlocbox import functions, solvers, acceleration >>> y = [4, 5, 6, 7] >>> x0 = np.zeros(len(y)) >>> f1 = functions.norm_l2(y=y) >>> f2 = functions.dummy() >>> accel=acceleration.fista_backtracking() >>> solver = solvers.forward_backward(accel=accel, step=0.5) >>> ret = solvers.solve([f1, f2], x0, solver, atol=1e-5) Solution found after 15 iterations: objective function f(sol) = 4.957288e-07 stopping criterion: ATOL >>> ret['sol'] array([ 4.0002509 , 5.00031362, 6.00037635, 7.00043907]) """ def __init__(self, eta=0.5, **kwargs): # I can do multiple inheritance here and avoid the deadly diamond of # death because the classes backtracking and fista modify different # methods of their parent class dummy. If that would not be the case, I # guess the best solution would be to inherit from accel and rewrite # the `_update_step()` and `_update_sol()` methods. backtracking.__init__(self, eta=eta, **kwargs) fista.__init__(self, **kwargs)