Source code for modopt.core.optimization_algorithms.steepest_descent

import numpy as np
import time

from modopt import Optimizer
from modopt.line_search_algorithms import ScipyLS, BacktrackingArmijo, Minpack2LS

[docs]class SteepestDescent(Optimizer):
    """
    Gradient-descent or steepest-descent algorithm for unconstrained optimization.

    Parameters
    ----------
    problem : Problem or ProblemLite
        Object containing the problem to be solved.
    recording : bool, default=False
        If ``True``, record all outputs from the optimization.
        This needs to be enabled for hot-starting the same problem later,
        if the optimization is interrupted.
    out_dir : str, optional
        The directory to store all the output files generated from the optimization.
    hot_start_from : str, optional
        The record file from which to hot-start the optimization.
    hot_start_atol : float, default=0.
        The absolute tolerance check for the inputs 
        when reusing outputs from the hot-start record.
    hot_start_rtol : float, default=0.
        The relative tolerance check for the inputs 
        when reusing outputs from the hot-start record.
    visualize : list, default=[]
        The list of scalar variables to visualize during the optimization.
    keep_viz_open : bool, default=False
        If ``True``, keep the visualization window open after the optimization is complete.
    turn_off_outputs : bool, default=False
        If ``True``, prevent modOpt from generating any output files.

    maxiter : int, default=500
        Maximum number of iterations.
    opt_tol : float, default=1e-5
        Optimality tolerance.
        Certifies convergence when the 2-norm of the gradient is less than this value.
    ls_type : {None, 'backtracking-armijo', 'derivative-based-strong-wolfe'}, default='derivative-based-strong-wolfe'
        Type of line search to use.
    ls_min_step : float, default=1e-14
        Minimum step size for the line search.
    ls_max_step : float, default=1.
        Maximum step size for the line search.
    ls_maxiter : int, default=10
        Maximum number of iterations for the line search.
    ls_alpha_tol : float, default=1e-14
        Relative tolerance for an acceptable step in the line search.
    ls_gamma_c : float, default=0.3
        Step length contraction factor when backtracking.
    ls_eta_a : float, default=1e-4
        Armijo (sufficient decrease condition) parameter for the line search.
    ls_eta_w : float, default=0.9
        Wolfe (curvature condition) parameter for the line search.

    readable_outputs : list, default=[]
        List of outputs to be written to readable text output files.
        Available outputs are: 'itr', 'obj', 'x', 'opt', 'time', 'nfev', 'ngev', 'step'.
    """
    def initialize(self):
        self.solver_name = 'steepest_descent'

        self.obj = self.problem._compute_objective
        self.grad = self.problem._compute_objective_gradient

        self.options.declare('maxiter', types=int, default=500)
        self.options.declare('opt_tol', types=float, default=1e-5)

        self.options.declare('ls_type',
                             default='derivative-based-strong-wolfe',
                             values=[None,
                                     'backtracking-armijo',
                                     'derivative-based-strong-wolfe'])
        self.options.declare('ls_min_step', default=1e-14, types=float)
        self.options.declare('ls_max_step', default=1.0, types=float)
        self.options.declare('ls_maxiter', default=10, types=int)
        self.options.declare('ls_alpha_tol', default=1e-14, types=float)
        self.options.declare('ls_gamma_c', default=0.3, types=float)
        self.options.declare('ls_eta_a', default=1e-4, types=float)
        self.options.declare('ls_eta_w', default=0.9, types=float)

        self.options.declare('readable_outputs', types=list, default=[])

        self.available_outputs = {
            'itr': int,
            'obj': float,
            # for arrays from each iteration, shapes need to be declared
            'x': (float, (self.problem.nx, )),
            'opt': float,
            'time': float,
            'nfev': int,
            'ngev': int,
            'step': float,
        }

    def setup(self):
        # self.LS = ScipyLS(f=self.obj, g=self.grad, max_step=50.)

        if self.options['ls_type'] == 'backtracking-armijo':
            self.LS = BacktrackingArmijo(f=self.obj,
                                         g=self.grad,
                                         eta_a=self.options['ls_eta_a'],
                                         gamma_c=self.options['ls_gamma_c'],
                                         max_step=self.options['ls_max_step'],
                                         maxiter=self.options['ls_maxiter'])
        elif self.options['ls_type'] == 'derivative-based-strong-wolfe':
            self.LS = Minpack2LS(f=self.obj,
                                 g=self.grad,
                                 min_step=self.options['ls_min_step'],
                                 max_step=self.options['ls_max_step'],
                                 maxiter=self.options['ls_maxiter'],
                                 alpha_tol=self.options['ls_alpha_tol'],
                                 eta_a=self.options['ls_eta_a'],
                                 eta_w=self.options['ls_eta_w'])

[docs]    def solve(self):

        # Assign shorter names to variables and methods
        nx = self.problem.nx
        x0 = self.problem.x0
        opt_tol = self.options['opt_tol']
        maxiter = self.options['maxiter']

        obj = self.obj
        grad = self.grad

        start_time = time.time()

        # Set initial values for current iterates
        x_k = x0 * 1.
        f_k = obj(x_k)
        g_k = grad(x_k)

        # Iteration counter
        itr = 0

        opt = np.linalg.norm(g_k)
        nfev = 1
        ngev = 1

        # Initializing declared outputs
        self.update_outputs(itr=0,
                            x=x_k,
                            obj=f_k,
                            opt=opt,
                            time=time.time() - start_time,
                            nfev=nfev,
                            ngev=ngev,
                            step=0.)

        while (opt > opt_tol and itr < maxiter):
            itr_start = time.time()
            itr += 1

            # ALGORITHM STARTS HERE
            # >>>>>>>>>>>>>>>>>>>>>

            # Compute the search direction toward the next iterate as the negative of the gradient
            p_k = -g_k

            # Compute the step length along the search direction via a line search
            if self.options['ls_type'] == 'backtracking-armijo':
                alpha, f_k, new_f_evals, new_g_evals, converged = self.LS.search(
                    x=x_k, p=p_k, f0=f_k, g0=g_k)

                g_k = grad(x_k + alpha * p_k)
                new_g_evals += 1

            elif self.options['ls_type'] == 'derivative-based-strong-wolfe':
                alpha, f_k, g_k, slope_new, new_f_evals, new_g_evals, converged = self.LS.search(
                    x=x_k, p=p_k, f0=f_k, g0=g_k)

            else:
                alpha, f_k, g_k, new_f_evals, new_g_evals, converged = (
                    1., obj(x_k + p_k), grad(x_k + p_k), 1, 1, True
                    )

            # Update the number of function and gradient evaluations
            nfev += new_f_evals
            ngev += new_g_evals

            # A step of length 1e-4 is taken along p_k if line search does not converge
            if not converged:
                alpha = 1e-4
                x_k += p_k * alpha
                f_k = obj(x_k)
                g_k = grad(x_k)
                nfev += 1
                ngev += 1

            else:
                x_k += alpha * p_k
                if not isinstance(g_k, np.ndarray):
                    if g_k == 'Unavailable':
                        g_k = grad(x_k)
                        ngev += 1

            opt = np.linalg.norm(g_k)

            # <<<<<<<<<<<<<<<<<<<
            # ALGORITHM ENDS HERE

            # Update arrays inside outputs dict with new values from the current iteration
            self.update_outputs(itr=itr,
                                x=x_k,
                                obj=f_k,
                                opt=opt,
                                time=time.time() - start_time,
                                nfev=nfev,
                                ngev=ngev,
                                step=alpha)

        self.total_time = time.time() - start_time
        converged = opt <= opt_tol

        self.results = {
            'x': x_k, 
            'objective': f_k, 
            'optimality': opt, 
            'nfev': nfev, 
            'ngev': ngev,
            'niter': itr, 
            'time': self.total_time,
            'converged': converged,
            }
        
        # Run post-processing for the Optimizer() base class
        self.run_post_processing()
        
        return self.results