Source code for pyunlocbox.acceleration

# -*- coding: utf-8 -*-

r"""
The :mod:`pyunlocbox.acceleration` module implements acceleration schemes for
use with the :mod:`pyunlocbox.solvers`. Pass a given acceleration object as an
argument to your chosen solver during its initialization so that the solver can
use it.

Interface
---------

The :class:`accel` base class defines a common interface to all acceleration
schemes:

.. autosummary::

    accel.pre
    accel.update_step
    accel.update_sol
    accel.post

Acceleration schemes
--------------------

Then, derived classes implement various common acceleration schemes.

.. autosummary::

    dummy
    backtracking
    fista
    fista_backtracking
    regularized_nonlinear

"""

import copy
import logging
import warnings

import numpy as np
from scipy.optimize.linesearch import line_search_armijo


[docs]class accel(object):
    r"""
    Defines the acceleration scheme object interface.

    This class defines the interface of an acceleration scheme object intended
    to be passed to a solver inheriting from
    :class:`pyunlocbox.solvers.solver`. It is intended to be a base class for
    standard acceleration schemes which will implement the required methods.
    It can also be instantiated by user code and dynamically modified for
    rapid testing. This class also defines the generic attributes of all
    acceleration scheme objects.

    """

    def __init__(self):
        pass

[docs]    def pre(self, functions, x0):
        """
        Pre-processing specific to the acceleration scheme.

        Gets called when :func:`pyunlocbox.solvers.solve` starts running.
        """
        self._pre(functions, x0)

    def _pre(self, functions, x0):
        raise NotImplementedError("Class user should define this method.")

[docs]    def update_step(self, solver, objective, niter):
        """
        Update the step size for the next iteration.

        Parameters
        ----------
        solver : pyunlocbox.solvers.solver
            Solver on which to act.
        objective : list of floats
            List of evaluations of the objective function since the beginning
            of the iterative process.
        niter : int
            Current iteration number.

        Returns
        -------
        float
            Updated step size.

        """
        return self._update_step(solver, objective, niter)

    def _update_step(self, solver, objective, niter):
        raise NotImplementedError("Class user should define this method.")

[docs]    def update_sol(self, solver, objective, niter):
        """
        Update the solution point for the next iteration.

        Parameters
        ----------
        solver : pyunlocbox.solvers.solver
            Solver on which to act.
        objective : list of floats
            List of evaluations of the objective function since the beginning
            of the iterative process.
        niter : int
            Current iteration number.

        Returns
        -------
        array_like
            Updated solution point.

        """
        return self._update_sol(solver, objective, niter)

    def _update_sol(self, solver, objective, niter):
        raise NotImplementedError("Class user should define this method.")

[docs]    def post(self):
        """
        Post-processing specific to the acceleration scheme.

        Mainly used to delete references added during initialization so that
        the garbage collector can free the memory. Gets called when
        :func:`pyunlocbox.solvers.solve` finishes running.

        """
        self._post()

    def _post(self):
        raise NotImplementedError("Class user should define this method.")


[docs]class dummy(accel):
    r"""
    Dummy acceleration scheme which does nothing.

    Used by default in most of the solvers. It simply returns unaltered the
    step size and solution point it receives.

    """

    def _pre(self, functions, x0):
        pass

    def _update_step(self, solver, objective, niter):
        return solver.step

    def _update_sol(self, solver, objective, niter):
        return solver.sol

    def _post(self):
        pass


# -----------------------------------------------------------------------------
# Stepsize optimizers
# -----------------------------------------------------------------------------


[docs]class backtracking(dummy):
    r"""
    Backtracking based on a local quadratic approximation of the smooth
    part of the objective.

    Parameters
    ----------
    eta : float
        A number between 0 and 1 representing the ratio of the geometric
        sequence formed by successive step sizes. In other words, it
        establishes the relation `step_new = eta * step_old`.
        Default is 0.5.

    Notes
    -----
    This is the backtracking strategy used in the original FISTA paper,
    :cite:`beck2009FISTA`.

    Examples
    --------
    >>> import numpy as np
    >>> from pyunlocbox import functions, solvers, acceleration
    >>> y = [4, 5, 6, 7]
    >>> x0 = np.zeros(len(y))
    >>> f1 = functions.norm_l1(y=y, lambda_=1.0)
    >>> f2 = functions.norm_l2(y=y, lambda_=0.8)
    >>> accel = acceleration.backtracking()
    >>> solver = solvers.forward_backward(accel=accel, step=10)
    >>> ret = solvers.solve([f1, f2], x0, solver, atol=1e-32, rtol=None)
    Solution found after 4 iterations:
        objective function f(sol) = 0.000000e+00
        stopping criterion: ATOL
    >>> ret['sol']
    array([ 4.,  5.,  6.,  7.])

    """

    def __init__(self, eta=0.5, **kwargs):
        if (eta > 1) or (eta <= 0):
            raise ValueError("eta must be between 0 and 1.")
        self.eta = eta
        super(backtracking, self).__init__(**kwargs)

    def _update_step(self, solver, objective, niter):
        """
        Notes
        -----
        TODO: For now we're recomputing gradients in order to evaluate the
        backtracking criterion. In the future, it might be interesting to
        think of some design changes so that this function has access to the
        gradients directly.

        Since the call to `solver._algo()` modifies the internal state of the
        solver itself, we need to store the solver's property values before
        doing backtracking, and then restore the solver's state after
        backtracking is done. This takes more memory, but it's the only way to
        guarantee that backtracking is performing on a fixed solver state.
        """
        # Save current state of the solver
        properties = copy.deepcopy(vars(solver))
        logging.debug('(Begin) solver properties: {}'.format(properties))

        # Initialize some useful variables
        fn = 0
        grad = np.zeros(properties['sol'].shape)
        for f in solver.smooth_funs:
            fn += f.eval(properties['sol'])
            grad += f.grad(properties['sol'])
        step = properties['step']

        logging.debug('fn = {}'.format(fn))

        while True:
            # Run the solver with the current stepsize
            solver.step = step
            logging.debug('Current step: {}'.format(step))
            solver._algo()
            logging.debug(
                '(During) solver properties: {}'.format(vars(solver)))

            # Record results
            fp = np.sum([f.eval(solver.sol) for f in solver.smooth_funs])
            logging.debug('fp = {}'.format(fp))

            dot_prod = np.dot(solver.sol - properties['sol'], grad)
            logging.debug('dot_prod = {}'.format(dot_prod))

            norm_diff = np.sum((solver.sol - properties['sol'])**2)
            logging.debug('norm_diff = {}'.format(norm_diff))

            # Restore the previous state of the solver
            for key, val in properties.items():
                setattr(solver, key, copy.copy(val))
            logging.debug('(Reset) solver properties: {}'.format(vars(solver)))

            if (2. * step * (fp - fn - dot_prod) <= norm_diff):
                logging.debug('Break condition reached')
                break
            else:
                logging.debug('Decreasing step')
                step *= self.eta

        return step


# -----------------------------------------------------------------------------
# Solution point optimizers
# -----------------------------------------------------------------------------


[docs]class fista(dummy):
    r"""
    Acceleration scheme for forward-backward solvers.

    Notes
    -----
    This is the acceleration scheme proposed in the original FISTA paper,
    :cite:`beck2009FISTA`.

    Examples
    --------
    >>> import numpy as np
    >>> from pyunlocbox import functions, solvers, acceleration
    >>> y = [4, 5, 6, 7]
    >>> x0 = np.zeros(len(y))
    >>> f1 = functions.norm_l2(y=y)
    >>> f2 = functions.dummy()
    >>> accel=acceleration.fista()
    >>> solver = solvers.forward_backward(accel=accel, step=0.5)
    >>> ret = solvers.solve([f1, f2], x0, solver, atol=1e-5)
    Solution found after 15 iterations:
        objective function f(sol) = 4.957288e-07
        stopping criterion: ATOL
    >>> ret['sol']
    array([ 4.0002509 ,  5.00031362,  6.00037635,  7.00043907])

    """

    def __init__(self, **kwargs):
        self.t = 1.
        super(fista, self).__init__(**kwargs)

    def _pre(self, functions, x0):
        self.sol = np.array(x0, copy=True)

    def _update_sol(self, solver, objective, niter):
        self.t = 1. if (niter == 1) else self.t  # Restart variable t if needed
        t = (1. + np.sqrt(1. + 4. * self.t**2.)) / 2.
        y = solver.sol + ((self.t - 1) / t) * (solver.sol - self.sol)
        self.t = t
        self.sol[:] = solver.sol
        return y

    def _post(self):
        del self.sol


[docs]class regularized_nonlinear(dummy):
    r"""
    Regularized nonlinear acceleration (RNA) for gradient descent.

    Parameters
    ----------
    k : int, optional
        Number of points to keep in the buffer for computing the extrapolation.
        (Default is 10.)
    lambda_ : float or list of floats, optional
        Regularization parameter in the acceleration scheme. The user can pass
        a list of candidates, and the acceleration algorithm will pick the one
        that provides the best extrapolation.
        (Default is 1e-6.)
    adaptive : boolean, optional
        If adaptive = True and the user has not provided a list of
        regularization parameters, the acceleration algorithm will assemble a
        grid of possible regularization parameters based on the SVD of the
        Gram matrix of vectors of differences in the extrapolation buffer.
        If adaptive = False, the algorithm will simply try to use the value(s)
        given in `lambda_`.
        (Default is True.)
    dolinesearch : boolean, optional
        If dolinesearch = True, the acceleration scheme will try to return a
        point in the line segment between the current extrapolation and the
        previous one that provides a decrease in the value of the objective
        function.
        If dolinesearch = False, the algorithm simply returns the current
        extrapolation.
        (Default is True.)
    forcedecrease : boolean, optional
        If forcedecrese = True and we obtain a bad extrapolation, the
        algorithm returns the unchanged solution produced by the solver.
        If forcedecrease = False, the algorithm returns the new extrapolation
        no matter what.
        (Default is True.)

    Notes
    -----
    This is the acceleration scheme proposed in :cite:`scieur2016`.

    See also Damien Scieur's `repository <https://github.com/windows7lover
    /RegularizedNonlinearAcceleration>`_ for the Matlab version that inspired
    this implementation.

    Examples
    --------
    >>> import numpy as np
    >>> from pyunlocbox import functions, solvers, acceleration
    >>> dim = 25;
    >>> np.random.seed(0)
    >>> xstar = np.random.rand(dim) # True solution
    >>> x0 = np.random.rand(dim)
    >>> x0 = xstar + 5.*(x0 - xstar) / np.linalg.norm(x0 - xstar)
    >>> A = np.random.rand(dim, dim)
    >>> step = 1/np.linalg.norm(np.dot(A.T, A))
    >>> f = functions.norm_l2(lambda_=0.5, A=A, y=np.dot(A, xstar))
    >>> fd = functions.dummy()
    >>> accel = acceleration.regularized_nonlinear(k=5)
    >>> solver = solvers.gradient_descent(step=step, accel=accel)
    >>> params = {'rtol':0, 'maxit':200, 'verbosity':'NONE'}
    >>> ret = solvers.solve([f, fd], x0, solver, **params)
    >>> pctdiff = 100*np.sum((xstar - ret['sol'])**2)/np.sum(xstar**2)
    >>> print('Difference: {0:.1f}%'.format(pctdiff))
    Difference: 1.3%

    """

    def __init__(self, k=10, lambda_=1e-6, adaptive=True, dolinesearch=True,
                 forcedecrease=True, **kwargs):
        self.k = k
        self.lambda_ = lambda_
        self.adaptive = adaptive
        self.dolinesearch = dolinesearch
        self.forcedecrease = forcedecrease

    @property
    def lambda_(self):
        return self._lambda_

    @lambda_.setter
    def lambda_(self, lambda_):
        try:
            self._lambda_ = [float(elem) for elem in lambda_]
        except TypeError:
            try:
                self._lambda_ = [float(lambda_)]
            except ValueError as err:
                print('lambda_ is not a number')
                raise err
        except ValueError as err:
            print('lambda_ is not a list of numbers')
            raise err

    def _pre(self, functions, x0):
        self.buffer = []
        self.functions = functions

    def _update_sol(self, solver, objective, niter):

        if (niter % (self.k + 1)) == 0:  # Extrapolate at each k iterations

            self.buffer.append(solver.sol)

            # (Normalized) matrix of differences
            U = np.diff(self.buffer, axis=0)
            UU = np.dot(U, U.T)
            UU /= np.linalg.norm(UU)

            # If no parameter grid was provided, assemble one.
            if self.adaptive and (len(self.lambda_) <= 1):
                svals = np.sort(np.abs(np.linalg.eigvals(UU)))
                svals = np.log(svals)
                svals = 0.5 * (svals[:-1] + svals[1:])
                self.lambda_ = np.concatenate(([0.], np.exp(svals)))

            # Grid search for the best parameter for the extrapolation
            fvals = []
            c = np.zeros((self.k,))
            extrap = np.zeros(np.shape(solver.sol))

            for lambda_ in self.lambda_:
                # Coefficients of the extrapolation
                c[:] = np.linalg.solve(UU + lambda_ * np.eye(self.k),
                                       np.ones(self.k))
                c[:] /= np.sum(c)

                extrap[:] = np.dot(np.asarray(self.buffer[:-1]).T, c)

                fvals.append(np.sum([f.eval(extrap) for f in self.functions]))

            if self.forcedecrease and (min(fvals) > np.sum(objective[-1])):
                # If we have bad extrapolations, keep solution as is
                extrap[:] = solver.sol
            else:
                # Return the best extrapolation from the grid search
                lambda_ = self.lambda_[fvals.index(min(fvals))]

                # We can afford to solve the linear system here again because
                # self.k is normally very small. Alternatively, we could have
                # kept track of the best extrapolations during the grid search,
                # but that would require at least double the memory, as we'd
                # have to store both the current extrapolation and the best
                # extrapolation.
                c[:] = np.linalg.solve(UU + lambda_ * np.eye(self.k),
                                       np.ones(self.k))
                c[:] /= np.sum(c)
                extrap[:] = np.dot(np.asarray(self.buffer[:-1]).T, c)

            # Improve proposal with line search
            if self.dolinesearch:
                # Objective evaluation functional
                def f(x):
                    return np.sum([f.eval(x) for f in self.functions])
                # Solution at previous extrapolation
                xk = self.buffer[0]
                # Search direction
                pk = extrap - xk
                # Objective value during the previous extrapolation
                old_fval = np.sum(objective[-self.k])

                a, fc, fa = line_search_armijo(f=f,
                                               xk=xk,
                                               pk=pk,
                                               gfk=-pk,
                                               old_fval=old_fval,
                                               c1=1e-4,
                                               alpha0=1.)

                # New point proposal
                if a is None:
                    warnings.warn('Line search failed to find good step size')
                else:
                    extrap[:] = xk + a * pk

            # Clear buffer and parameter grid for next extrapolation process
            self.buffer = []
            self.lambda_ = [] if self.adaptive else self.lambda_

            return extrap

        else:  # Gather points for future extrapolation
            self.buffer.append(copy.copy(solver.sol))
            return solver.sol

    def _post(self):
        del self.buffer, self.functions

# -----------------------------------------------------------------------------
# Mixed optimizers
# -----------------------------------------------------------------------------


[docs]class fista_backtracking(backtracking, fista):
    r"""
    Acceleration scheme with backtracking for forward-backward solvers.

    Notes
    -----
    This is the acceleration scheme and backtracking strategy proposed in the
    original FISTA paper, :cite:`beck2009FISTA`.

    Examples
    --------
    >>> import numpy as np
    >>> from pyunlocbox import functions, solvers, acceleration
    >>> y = [4, 5, 6, 7]
    >>> x0 = np.zeros(len(y))
    >>> f1 = functions.norm_l2(y=y)
    >>> f2 = functions.dummy()
    >>> accel=acceleration.fista_backtracking()
    >>> solver = solvers.forward_backward(accel=accel, step=0.5)
    >>> ret = solvers.solve([f1, f2], x0, solver, atol=1e-5)
    Solution found after 15 iterations:
        objective function f(sol) = 4.957288e-07
        stopping criterion: ATOL
    >>> ret['sol']
    array([ 4.0002509 ,  5.00031362,  6.00037635,  7.00043907])

    """

    def __init__(self, eta=0.5, **kwargs):
        # I can do multiple inheritance here and avoid the deadly diamond of
        # death because the classes backtracking and fista modify different
        # methods of their parent class dummy. If that would not be the case, I
        # guess the best solution would be to inherit from accel and rewrite
        # the `_update_step()` and `_update_sol()` methods.

        backtracking.__init__(self, eta=eta, **kwargs)
        fista.__init__(self, **kwargs)