Source code for estimagic.optimization.algo_options

import numpy as np

"""
The order is the following:

1. Convergence and Stopping Criteria
2. Trust Region Parameters
3. Other Numerical Algorithm Group Tuning Parameters

"""
"""
=====================================================================================
1. Stopping Criteria
=====================================================================================
"""

CONVERGENCE_RELATIVE_CRITERION_TOLERANCE = 2e-9
"""float: Stop when the relative improvement between two iterations is below this.

    The exact definition of relative improvement depends on the optimizer and should
    be documented there. To disable it, set it to 0.

    The default value is inspired by scipy L-BFGS-B defaults, but rounded.

"""

CONVERGENCE_ABSOLUTE_CRITERION_TOLERANCE = 0
"""float: Stop when the absolute improvement between two iterations is below this.

    Disabled by default because it is very problem specific.

"""

CONVERGENCE_ABSOLUTE_GRADIENT_TOLERANCE = 1e-5
"""float: Stop when the gradient are smaller than this.

    For some algorithms this criterion refers to all entries, for others to some norm.

    For bound constrained optimizers this typically refers to a projected gradient.
    The exact definition should be documented for each optimizer.

    The default is the same as scipy. To disable it, set it to zero.

"""

CONVERGENCE_RELATIVE_GRADIENT_TOLERANCE = 1e-8
"""float: Stop when the gradient, divided by the absolute value of the criterion
    function is smaller than this. For some algorithms this criterion refers to
    all entries, for others to some norm.For bound constrained optimizers this
    typically refers to a projected gradient. The exact definition should be documented
    for each optimizer. To disable it, set it to zero.

"""

CONVERGENCE_SCALED_GRADIENT_TOLERANCE = 1e-8
"""float: Stop when all entries (or for some algorithms the norm) of the gradient,
    divided by the norm of the gradient at start parameters is smaller than this.
    For bound constrained optimizers this typically refers to a projected gradient.
    The exact definition should be documented for each optimizer.
    To disable it, set it to zero.

"""

CONVERGENCE_RELATIVE_PARAMS_TOLERANCE = 1e-5
"""float: Stop when the relative change in parameters is smaller than this.
    The exact definition of relative change and whether this refers to the maximum
    change or the average change depends on the algorithm and should be documented
    there. To disable it, set it to zero. The default is the same as in scipy.

"""

CONVERGENCE_ABSOLUTE_PARAMS_TOLERANCE = 0
"""float: Stop when the absolute change in parameters between two iterations is smaller
    than this. Whether this refers to the maximum change or the average change depends
    on the algorithm and should be documented there.

    Disabled by default because it is very problem specific. To enable it, set it to a
    value larger than zero.

"""

CONVERGENCE_NOISE_CORRECTED_CRITERION_TOLERANCE = 1.0
"""float: Stop when the evaluations on the set of interpolation points all fall within
    this factor of the noise level. The default is 1, i.e. when all evaluations are
    within the noise level. If you want to not use this criterion but still flag your
    criterion function as noisy, set this tolerance to 0.0.

    .. warning::
        Very small values, as in most other tolerances don't make sense here.

"""

CONVERGENCE_MINIMAL_TRUSTREGION_RADIUS_TOLERANCE = 1e-8
"""float: Stop when the lower trust region radius falls below this value."""


STOPPING_MAX_CRITERION_EVALUATIONS = 1_000_000
"""int:
    If the maximum number of function evaluation is reached, the optimization stops
    but we do not count this as successful convergence. The function evaluations used
    to evaluate a numerical gradient do not count for this.

"""


STOPPING_MAX_CRITERION_EVALUATIONS_GLOBAL = 1_000
"""int:
    If the maximum number of function evaluation is reached, the optimization stops
    but we do not count this as successful convergence. The function evaluations used
    to evaluate a numerical gradient do not count for this. Set to a lower number than
    STOPPING_MAX_CRITERION_EVALUATIONS for global optimizers.

"""


STOPPING_MAX_ITERATIONS = 1_000_000
"""int:
    If the maximum number of iterations is reached, the
    optimization stops, but we do not count this as successful convergence.
    The difference to ``max_criterion_evaluations`` is that one iteration might
    need several criterion evaluations, for example in a line search or to determine
    if the trust region radius has to be shrunk.

"""

CONVERGENCE_SECOND_BEST_ABSOLUTE_CRITERION_TOLERANCE = 1e-08
"""float: absolute criterion tolerance estimagic requires if no other stopping
criterion apart from max iterations etc. is available
this is taken from scipy (SLSQP's value, smaller than Nelder-Mead).

"""

CONVERGENCE_SECOND_BEST_ABSOLUTE_PARAMS_TOLERANCE = 1e-08
"""float: The absolute parameter tolerance estimagic requires if no other stopping
criterion apart from max iterations etc. is available. This is taken from pybobyqa.

"""

CONVERGENCE_SLOW_PROGRESS = {
    "threshold_to_characterize_as_slow": 1e-8,
    "max_insufficient_improvements": None,
    "comparison_period": 5,
}
"""dict: Specification of when to terminate or reset the optimization because of only
    slow improvements. This is similar to an absolute criterion tolerance only that
    instead of a single improvement the average over several iterations must be small.

    Possible entries are:
        threshold_to_characterize_as_slow (float): Threshold whether an improvement
            is insufficient. Note: the improvement is divided by the
            ``comparison_period``.
            So this is the required average improvement per iteration over the
            comparison period.
        max_insufficient_improvements (int): Number of consecutive
            insufficient improvements before termination (or reset). Default is
            ``20 * len(x)``.
        comparison_period (int):
            How many iterations to go back to calculate the improvement.
            For example 5 would mean that each criterion evaluation is compared to the
            criterion value from 5 iterations before.

"""

"""
=====================================================================================
2. Other Common Tuning Parameters for Optimization Algorithms
=====================================================================================
"""

MAX_LINE_SEARCH_STEPS = 20
"""int: Inspired by scipy L-BFGS-B."""

LIMITED_MEMORY_STORAGE_LENGTH = 10
"""int: Taken from scipy L-BFGS-B."""

THRESHOLD_FOR_SAFETY_STEP = 0.5
r"""float: Threshold for when to call the safety step (:math:`\gamma_s`).

    :math:`\text{proposed step} \leq \text{threshold_for_safety_step} \cdot
    \text{current_lower_trustregion_radius}`.

"""

CONSTRAINTS_ABSOLUTE_TOLERANCE = 1e-5
"""float: Allowed tolerance of the equality and inequality constraints for values to be
considered 'feasible'.

"""

"""
-------------------------
Trust Region Parameters
-------------------------
"""

TRUSTREGION_THRESHOLD_SUCCESSFUL = 0.1
"""float: Share of the predicted improvement that has to be achieved for a trust
    region iteration to count as successful.

"""

TRUSTREGION_THRESHOLD_VERY_SUCCESSFUL = 0.7
"""float: Share of predicted improvement that has to be achieved for a trust region
    iteration to count as very successful.``criterion_noisy``

"""

TRUSTREGION_SHRINKING_FACTOR_NOT_SUCCESSFUL = None
"""float: Ratio by which to shrink the upper trust region radius when realized
    improvement does not match the ``threshold_successful``. The default is 0.98
    if the criterion is noisy and 0.5 else.

"""

TRUSTREGION_EXPANSION_FACTOR_SUCCESSFUL = 2.0
r"""float: Ratio by which to expand the upper trust region radius :math:`\Delta_k`
    in very successful iterations (:math:`\gamma_{inc}` in the notation of the paper).

"""

TRUSTREGION_EXPANSION_FACTOR_VERY_SUCCESSFUL = 4.0
r"""float: Ratio of the proposed step ($\|s_k\|$) by which to expand the upper trust
    region radius (:math:`\Delta_k`) in very successful iterations
    (:math:`\overline{\gamma}_{inc}` in the notation of the paper).

"""

TRUSTREGION_SHRINKING_FACTOR_LOWER_RADIUS = None
r"""float: Ratio by which to shrink the lower trust region radius (:math:`\rho_k`)
    (:math:`\alpha_1` in the notation of the paper). Default is 0.9 if
    the criterion is noisy and 0.1 else.

"""

TRUSTREGION_SHRINKING_FACTOR_UPPER_RADIUS = None
r"""float: Ratio of the current lower trust region (:math:`\rho_k`) by which to shrink
    the upper trust region radius (:math:`\Delta_k`) when the lower one is shrunk
    (:math:`\alpha_2` in the notation of the paper). Default is 0.95 if the
    criterion is noisy and 0.5 else."""

"""
---------------------------------------------
Numerical Algorithm Group Tuning Parameters
---------------------------------------------
"""

INITIAL_DIRECTIONS = "coordinate"
"""string: How to draw the initial directions. Possible values are "coordinate" for
    coordinate directions (the default) or "random".

"""

RANDOM_DIRECTIONS_ORTHOGONAL = True
"""bool: Whether to make randomly drawn initial directions orthogonal."""


INTERPOLATION_ROUNDING_ERROR = 0.1
r"""float: Internally, all the NAG algorithms store interpolation points with respect
    to a base point :math:`x_b`; that is, we store :math:`\{y_t-x_b\}`,
    which reduces the risk of roundoff errors. We shift :math:`x_b` to :math:`x_k` when
    :math:`\text{proposed step} \leq \text{interpolation_rounding_error} \cdot
    \|x_k-x_b\|`.

"""

CLIP_CRITERION_IF_OVERFLOWING = True
"""bool: Whether to clip the criterion to avoid ``OverflowError``."""


TRUSTREGION_PRECONDITION_INTERPOLATION = True
"""bool: whether to scale the interpolation linear system to improve conditioning."""


RESET_OPTIONS = {
    "use_resets": None,
    "minimal_trustregion_radius_tolerance_scaling_at_reset": 1.0,
    "reset_type": "soft",
    "move_center_at_soft_reset": True,
    "reuse_criterion_value_at_hard_reset": True,
    "max_iterations_without_new_best_after_soft_reset": None,
    "auto_detect": True,
    "auto_detect_history": 30,
    "auto_detect_min_jacobian_increase": 0.015,
    "auto_detect_min_correlations": 0.1,
    "points_to_replace_at_soft_reset": 3,
    "max_consecutive_unsuccessful_resets": 10,
    # just bobyqa
    "max_unsuccessful_resets": None,
    "trust_region_scaling_at_unsuccessful_reset": None,
    # just dfols
    "max_interpolation_points": None,
    "n_extra_interpolation_points_per_soft_reset": 0,
    "n_extra_interpolation_points_per_hard_reset": 0,
    "n_additional_extra_points_to_replace_per_reset": 0,
}
r"""dict: Options for reseting the optimization.

    Possible entries are:

        use_resets (bool): Whether to do resets when the lower trust
            region radius (:math:`\rho_k`) reaches the stopping criterion
            (:math:`\rho_{end}`), or (optionally) when all interpolation points are
            within noise level. Default is ``True`` if the criterion is noisy.
        minimal_trustregion_radius_tolerance_scaling_at_reset (float): Factor with
            which the trust region stopping criterion is multiplied at each reset.

        reset_type (str): Whether to use "soft" or "hard" resets. Default is "soft".

        move_center_at_soft_reset (bool): Whether to move the trust region center
            ($x_k$) to the best new point evaluated in stead of keeping it constant.
        points_to_replace_at_soft_reset (int): Number of interpolation points to move
            at each soft reset.
        reuse_criterion_value_at_hard_reset (bool): Whether or not to recycle the
            criterion value at the best iterate found when performing a hard reset.
            This saves one criterion evaluation.
        max_iterations_without_new_best_after_soft_reset (int):
            The maximum number of successful steps in a given run where the new
            criterion value is worse than the best value found in previous runs before
            terminating. Default is ``max_criterion_evaluations``.
        auto_detect (bool): Whether or not to
            automatically determine when to reset. This is an additional condition
            and resets can still be triggered by small upper trust region radius, etc.
            There are two criteria used: upper trust region radius shrinkage
            (no increases over the history, more decreases than no changes) and
            changes in the model Jacobian (consistently increasing trend as measured
            by slope and correlation coefficient of the line of best fit).
        auto_detect_history (int):
            How many iterations of model changes and trust region radii to store.
        auto_detect_min_jacobian_increase (float):
            Minimum rate of increase of the Jacobian over past iterations to cause a
            reset.
        auto_detect_min_correlations (float):
            Minimum correlation of the Jacobian data set required to cause a reset.
        max_consecutive_unsuccessful_resets (int): maximum number of consecutive
            unsuccessful resets allowed (i.e. resets which did not outperform the
            best known value from earlier runs).

    Only used when using nag_bobyqa:

        max_unsuccessful_resets (int):
            number of total unsuccessful resets allowed.
            Default is 20 if ``seek_global_optimum`` and else unrestricted.
        trust_region_scaling_at_unsuccessful_reset (float): Factor by which to
            expand the initial lower trust region radius (:math:`\rho_{beg}`) after
            unsuccessful resets. Default is 1.1 if ``seek_global_optimum`` else 1.

    Only used when using nag_dfols:

        max_interpolation_points (int): Maximum allowed value of the number of
            interpolation points. This is useful if the number of interpolation points
            increases with each reset, e.g. when
            ``n_extra_interpolation_points_per_soft_reset > 0``. The default is
            ``n_interpolation_points``.
        n_extra_interpolation_points_per_soft_reset (int): Number of points to add to
            the interpolation set with each soft reset.
        n_extra_interpolation_points_per_hard_reset (int): Number of points to add to
            the interpolation set with each hard reset.
        n_additional_extra_points_to_replace_per_reset (int): This parameter modifies
            ``n_extra_points_to_replace_successful``. With each reset
            ``n_extra_points_to_replace_successful`` is increased by this number.

"""


TRUSTREGION_FAST_START_OPTIONS = {
    "min_inital_points": None,
    "method": "auto",
    "scale_of_trustregion_step_perturbation": None,
    "scale_of_jacobian_components_perturbation": 1e-2,
    # the following will be growing.full_rank.min_sing_val
    # but it not supported yet by DF-OLS.
    "floor_of_jacobian_singular_values": 1,
    "jacobian_max_condition_number": 1e8,
    "geometry_improving_steps": False,
    "safety_steps": True,
    "shrink_upper_radius_in_safety_steps": False,
    "full_geometry_improving_step": False,
    "reset_trustregion_radius_after_fast_start": False,
    "reset_min_trustregion_radius_after_fast_start": False,
    "shrinking_factor_not_successful": None,
    "n_extra_search_directions_per_iteration": 0,
}
r"""dict: Options to start the optimization while building the full trust region model.

    To activate this, set the number of interpolation points at which to evaluate the
    criterion before doing the first step, `min_initial_points`, to something smaller
    than the number of parameters.

    The following options can be specified:

        min_initial_points (int): Number of initial interpolation
            points in addition to the start point. This should only be changed to
            a value less than ``len(x)``, and only if the default setup cost
            of ``len(x) + 1`` evaluations of the criterion is impractical.
            If this is set to be less than the default, the input value of
            ``n_interpolation_points`` should be set to ``len(x)``.
            If the default is used, all the other parameters have no effect.
            Default is ``n_interpolation_points - 1``.
            If the default setup costs of the evaluations are very large, DF-OLS
            can start with less than ``len(x)`` interpolation points and add points
            to the trust region model with every iteration.
        method ("jacobian", "trustregion" or "auto"):
            When there are less interpolation points than ``len(x)`` the model is
            underdetermined. This can be fixed in two ways:
            If "jacobian", the interpolated Jacobian is perturbed to have full
            rank, allowing the trust region step to include components in the full
            search space. This is the default if
            ``len(x) \geq number of root contributions``.
            If "trustregion_step", the trust region step is perturbed by an
            orthogonal direction not yet searched. It is the default if
            ``len(x) < number of root contributions``.
        scale_of_trustregion_step_perturbation (float):
            When adding new search directions, the length of the step is the trust
            region radius multiplied by this value. The default is 0.1 if
            ``method == "trustregion"`` else 1.
        scale_of_jacobian_components_perturbation (float): Magnitude of extra
            components added to the Jacobian. Default is 1e-2.
        floor_of_jacobian_singular_values (float): Floor singular
            values of the Jacobian at this factor of the last non zero value.
            As of version 1.2.1 this option is not yet supported by DF-OLS!
        scale_of_jacobian_singular_value_floor (float):
            Floor singular values of the Jacobian at this factor of the last nonzero
            value.
        jacobian_max_condition_number (float): Cap on the condition number
            of Jacobian after applying floors to singular values
            (effectively another floor on the smallest singular value, since the
            largest singular value is fixed).
        geometry_improving_steps (bool): Whether to do geometry-improving steps in the
            trust region algorithm, as per the usual algorithm during the fast start.
        safety_steps (bool):
            Whether to perform safety steps.
        shrink_upper_radius_in_safety_steps (bool): During the fast start whether to
            shrink the upper trust region radius in safety steps.
        full_geometry_improving_step (bool): During the fast start whether to do a
            full geometry-improving step within safety steps (the same as the post fast
            start phase of the algorithm). Since this involves reducing the upper trust
            region radius, this can only be `True` if
            `shrink_upper_radius_in_safety_steps == False`.
        reset_trustregion_radius_after_fast_start (bool):
            Whether or not to reset the upper trust region radius to its initial value
            at the end of the fast start phase.
        reset_min_trustregion_radius_after_fast_start (bool):
            Whether or not to reset the minimum trust region radius
            (:math:`\rho_k`) to its initial value at the end of the fast start phase.
        shrinking_factor_not_successful (float):
            Ratio by which to shrink the trust region radius when realized
            improvement does not match the ``threshold_for_successful_iteration``
            during the fast start phase.  By default it is the same as
            ``reduction_when_not_successful``.
        n_extra_search_directions_per_iteration (int): Number of new search
            directions to add with each iteration where we do not have a full set
            of search directions. This approach is not recommended! Default is 0.

"""


[docs]def get_population_size(population_size, x, lower_bound=10):
    """Default population size for genetic algorithms."""
    if population_size is None:
        population_size = int(np.clip(10 * (len(x) + 1), lower_bound, np.inf))
    else:
        population_size = int(population_size)
    return population_size