// Copyright (C) 2008 Davis E. King (davisking@users.sourceforge.net) // License: Boost Software License See LICENSE.txt for the full license. #undef DLIB_OPTIMIZATIOn_ABSTRACT_ #ifdef DLIB_OPTIMIZATIOn_ABSTRACT_ #include <cmath> #include <limits> #include "../matrix/matrix_abstract.h" #include "../algs.h" namespace dlib { // ---------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------- // Functions that transform other functions // ---------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------- template < typename funct > class central_differences; /*! This is a function object that represents the derivative of some other function. Note that if funct is a function of a double then the derivative of funct is just a double but if funct is a function of a dlib::matrix (i.e. a function of many variables) then its derivative is a gradient vector. !*/ template < typename funct > const central_differences<funct> derivative( const funct& f, double eps ); /*! requires - f == a function that returns a scalar - f must take either double or a dlib::matrix that is a column vector - eps > 0 ensures - returns a function that represents the derivative of the function f. It is approximated numerically by: (f(x+eps)-f(x-eps))/(2*eps) !*/ template < typename funct > const central_differences<funct> derivative( const funct& f ); /*! ensures - returns derivative(f, 1e-7) !*/ // ---------------------------------------------------------------------------------------- template < typename funct, typename T > class line_search_funct; /*! This object is a function object that represents a line search function. It represents a function with the signature: double l(double x) !*/ template < typename funct, typename T > const line_search_funct<funct,T> make_line_search_function ( const funct& f, const T& start, const T& direction ); /*! requires - is_matrix<T>::value == true (i.e. T must be a dlib::matrix) - f must take a dlib::matrix that is a column vector - start.nc() == 1 - direction.nc() == 1 (i.e. start and direction should be column vectors) - f must return either a double or a column vector the same length and type as start - f(start + 1.5*direction) should be a valid expression ensures - if (f returns a double) then - returns a line search function that computes l(x) == f(start + x*direction) - else - returns a line search function that computes l(x) == trans(f(start + x*direction))*direction - We assume that f is the derivative of some other function and that what f returns is a gradient vector. So the following two expressions both create the derivative of l(x): - derivative(make_line_search_function(funct,start,direction)) - make_line_search_function(derivative(funct),start,direction) !*/ // ---------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------- // Functions that perform unconstrained optimization // ---------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------- inline double poly_min_extrap ( double f0, double d0, double f1, double d1 ); /*! ensures - let c(x) be a 3rd degree polynomial such that: - c(0) == f0 - c(1) == f1 - derivative of c(x) at x==0 is d0 - derivative of c(x) at x==1 is d1 - returns the point in the range [0,1] that minimizes the polynomial c(x) !*/ // ---------------------------------------------------------------------------------------- template < typename funct, typename funct_der > double line_search ( const funct& f, const funct_der& der, double rho, double sigma, double minf, double& f0_out ); /*! requires - 1 > sigma > rho > 0 - f and der are scalar functions of scalars (e.g. line_search_funct objects) - der is the derivative of f ensures - returns a value alpha such that f(alpha) is significantly closer to the minimum of f than f(0). - bigger values of sigma result in a less accurate but faster line search - f0_out == f(0) !*/ // ---------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------- template < typename funct, typename funct_der, typename T > void find_min_quasi_newton ( const funct& f, const funct_der& der, T& x, double min_f, double min_delta = 1e-7 ); /*! requires - min_delta >= 0 - f(x) must be a valid expression that evaluates to a double - der(x) must be a valid expression that evaluates to the derivative of f() at x. - is_matrix<T>::value == true (i.e. T must be a dlib::matrix type) - x.nc() == 1 (i.e. x must be a column vector) ensures - Performs an unconstrained minimization of the function f() using the BFGS quasi newton method. The optimization stops when any of the following conditions are satisfied: - the change in f() from one iteration to the next is less than min_delta - f(#x) <= min_f - #x == the value of x that was found to minimize f() !*/ // ---------------------------------------------------------------------------------------- template < typename funct, typename T > void find_min_quasi_newton2 ( const funct& f, T& x, double min_f, double min_delta = 1e-7, const double derivative_eps = 1e-7 ); /*! requires - min_delta >= 0 - derivative_eps > 0 - f(x) must be a valid expression that evaluates to a double - is_matrix<T>::value == true (i.e. T must be a dlib::matrix type) - x.nc() == 1 (i.e. x must be a column vector) ensures - Performs an unconstrained minimization of the function f() using a quasi newton method. The optimization stops when any of the following conditions are satisfied: - the change in f() from one iteration to the next is less than min_delta - f(#x) <= min_f - Uses the dlib::derivative(f,derivative_eps) function to compute gradient information - #x == the value of x that was found to minimize f() !*/ // ---------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------- template < typename funct, typename funct_der, typename T > void find_min_conjugate_gradient ( const funct& f, const funct_der& der, T& x, double min_f, double min_delta = 1e-7 ); /*! requires - min_delta >= 0 - f(x) must be a valid expression that evaluates to a double - der(x) must be a valid expression that evaluates to the derivative of f() at x. - is_matrix<T>::value == true (i.e. T must be a dlib::matrix type) - x.nc() == 1 (i.e. x must be a column vector) ensures - Performs an unconstrained minimization of the function f() using a conjugate gradient method. The optimization stops when any of the following conditions are satisfied: - the change in f() from one iteration to the next is less than min_delta - f(#x) <= min_f - #x == the value of x that was found to minimize f() !*/ // ---------------------------------------------------------------------------------------- template < typename funct, typename T > void find_min_conjugate_gradient2 ( const funct& f, T& x, double min_f, double min_delta = 1e-7, const double derivative_eps = 1e-7 ); /*! requires - min_delta >= 0 - derivative_eps > 0 - f(x) must be a valid expression that evaluates to a double - der(x) must be a valid expression that evaluates to the derivative of f() at x. - is_matrix<T>::value == true (i.e. T must be a dlib::matrix type) - x.nc() == 1 (i.e. x must be a column vector) ensures - Performs an unconstrained minimization of the function f() using a conjugate gradient method. The optimization stops when any of the following conditions are satisfied: - the change in f() from one iteration to the next is less than min_delta - f(#x) <= min_f - Uses the dlib::derivative(f,derivative_eps) function to compute gradient information - #x == the value of x that was found to minimize f() !*/ // ---------------------------------------------------------------------------------------- } #endif // DLIB_OPTIMIZATIOn_ABSTRACT_