diff --git a/.gitignore b/.gitignore index 1ef3a3d..e1cd519 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ .idea/* venv/* +.venv/* *.pyc fides.egg-info/* .DS_Store diff --git a/fides/hessian_approximation.py b/fides/hessian_approximation.py index bb421fd..aeb07b9 100644 --- a/fides/hessian_approximation.py +++ b/fides/hessian_approximation.py @@ -17,21 +17,18 @@ class HessianApproximation: Abstract class from which Hessian update strategies should subclass """ - def __init__(self, init_with_hess: bool | None = False): + def __init__(self): """ Create a Hessian update strategy instance - - :param init_with_hess: - Whether the hybrid update strategy should be initialized - according to the user-provided objective function """ self._hess: np.ndarray = np.empty(0) self._diff: np.ndarray = np.empty(0) - self.init_with_hess = init_with_hess - def init_mat(self, dim: int, hess: np.ndarray | None = None) -> None: + def _init_mat(self, dim: int, hess: np.ndarray | None = None) -> None: """ - Initializes this approximation instance and checks the dimensionality + Initializes this approximation instance and checks the dimensionality. + Note that this method is not intended to be called directly by the + user. :param dim: dimension of optimization variables @@ -39,7 +36,7 @@ def init_mat(self, dim: int, hess: np.ndarray | None = None) -> None: :param hess: user provided initialization """ - if hess is None or not self.init_with_hess: + if hess is None: self._hess = np.eye(dim) else: if hess.shape[0] != dim: @@ -146,7 +143,6 @@ class Broyden(IterativeHessianApproximation): def __init__( self, phi: float, - init_with_hess: bool | None = False, enforce_curv_cond: bool | None = True, ): self.phi = phi @@ -158,7 +154,7 @@ def __init__( 'preserved during updating.', stacklevel=2, ) - super().__init__(init_with_hess) + super().__init__() def _compute_update(self, s: np.ndarray, y: np.ndarray): self._diff = broyden_class_update( @@ -176,12 +172,10 @@ class BFGS(Broyden): def __init__( self, - init_with_hess: bool | None = False, enforce_curv_cond: bool | None = True, ): super().__init__( phi=0.0, - init_with_hess=init_with_hess, enforce_curv_cond=enforce_curv_cond, ) @@ -196,12 +190,10 @@ class DFP(Broyden): def __init__( self, - init_with_hess: bool | None = False, enforce_curv_cond: bool | None = True, ): super().__init__( phi=1.0, - init_with_hess=init_with_hess, enforce_curv_cond=enforce_curv_cond, ) @@ -273,9 +265,9 @@ def __init__(self, happ: IterativeHessianApproximation | None = None): self.hessian_update = happ if happ is not None else BFGS() super().__init__() - def init_mat(self, dim: int, hess: np.ndarray | None = None): - self.hessian_update.init_mat(dim, hess) - super().init_mat(dim, hess) + def _init_mat(self, dim: int, hess: np.ndarray | None = None): + self.hessian_update._init_mat(dim, hess) + super()._init_mat(dim, hess) def requires_hess(self): return True # pragma: no cover @@ -460,12 +452,12 @@ def __init__( 'preserved during updating.', stacklevel=2, ) - super().__init__(init_with_hess=True) + super().__init__() - def init_mat(self, dim: int, hess: np.ndarray | None = None): + def _init_mat(self, dim: int, hess: np.ndarray | None = None): self.A = np.eye(dim) * np.spacing(1) self._structured_diff = np.zeros_like(self.A) - super().init_mat(dim, hess) + super()._init_mat(dim, hess) def update( self, diff --git a/fides/minimize.py b/fides/minimize.py index 208ca3c..3ae0166 100644 --- a/fides/minimize.py +++ b/fides/minimize.py @@ -283,8 +283,6 @@ def __init__( self.grad_min = self.grad self.hessian_update: HessianApproximation | None = hessian_update - if not self.hessian_update.get_mat().empty(): - self.hess = self.hessian_update.get_mat() self.iterations_since_tr_update: int = 0 self.n_intermediate_tr_radius: int = 0 @@ -312,7 +310,12 @@ def _reset(self, start_id: str | None = None): self.start_id = start_id self.history = defaultdict(list) - def minimize(self, x0: np.ndarray, start_id: str | None = None): + def minimize( + self, + x0: np.ndarray, + start_id: str | None = None, + hess0: np.ndarray | str | None = None, + ) -> tuple[float, np.ndarray, np.ndarray, np.ndarray]: """ Minimize the objective function using the interior trust-region reflective algorithm described by [ColemanLi1994] and [ColemanLi1996] @@ -329,13 +332,25 @@ def minimize(self, x0: np.ndarray, start_id: str | None = None): options[`maxtime`] on the next iteration. :param x0: - initial guess + initial guess for the optimization variables + + :param start_id: + optional identifier for this optimization run, used for history + tracking + + :param hess0: + optional initial Hessian approximation. If a string 'hess' is + provided, the initial Hessian from the objective function + evaluation at x0 is used. Otherwise, a numpy array of shape + (n,n) must be provided, where n is the number of optimization + variables. :returns: fval: final function value, x: final optimization variable values, grad: final gradient, hess: final Hessian (approximation) + """ self._reset(start_id) @@ -349,8 +364,11 @@ def minimize(self, x0: np.ndarray, start_id: str | None = None): self.fval, self.grad = funout.fval, funout.grad if self.hessian_update is not None: - if self.hessian_update.get_mat().empty(): - self.hessian_update.init_mat(len(self.x), funout.hess) + if isinstance(hess0, str) and hess0 == 'hess': + self.hessian_update._init_mat(len(self.x), funout.hess) + else: + self.hessian_update._init_mat(len(self.x), hess0) + self.hess = self.hessian_update.get_mat().copy() else: self.hess = funout.hess.copy() diff --git a/fides/version.py b/fides/version.py index 80ae5ce..32a90a3 100644 --- a/fides/version.py +++ b/fides/version.py @@ -1 +1 @@ -__version__ = '0.7.9' +__version__ = '0.8.0' diff --git a/tests/test_hessian_approximation.py b/tests/test_hessian_approximation.py index 4f38721..894a91b 100644 --- a/tests/test_hessian_approximation.py +++ b/tests/test_hessian_approximation.py @@ -5,21 +5,21 @@ def test_wrong_dim(): - h = BFGS(init_with_hess=True) + h = BFGS() with pytest.raises(ValueError): - h.init_mat(dim=3, hess=np.ones((2, 2))) + h._init_mat(dim=3, hess=np.ones((2, 2))) h = BFGS() - h.init_mat(dim=3) + h._init_mat(dim=3) with pytest.raises(ValueError): h.set_mat(np.ones((2, 2))) def test_broyden(): h = Broyden(phi=2) - h.init_mat(dim=2) + h._init_mat(dim=2) h.update(np.random.random((2, 1)), np.random.random((2, 1))) h = Broyden(phi=-1) - h.init_mat(dim=2) + h._init_mat(dim=2) h.update(np.random.random((2,)), np.random.random((2,))) diff --git a/tests/test_minimize.py b/tests/test_minimize.py index 4bad457..3e39851 100644 --- a/tests/test_minimize.py +++ b/tests/test_minimize.py @@ -185,17 +185,11 @@ def unbounded_and_init(): (rosengrad, BB()), # 5 (rosengrad, Broyden(0.5)), # 6 (rosenboth, HybridFixed(BFGS())), # 7 - (rosenboth, HybridFixed(SR1())), # 8 - (rosenboth, HybridFixed(BFGS(init_with_hess=True))), # 9 - (rosenboth, HybridFixed(SR1(init_with_hess=True))), # 10 + (rosenboth, HybridFixed(SR1())), # 8 # 10 (rosenboth, HybridFraction(BFGS())), # 11 (rosenboth, HybridFraction(SR1())), # 12 - (rosenboth, HybridFraction(BFGS(init_with_hess=True))), # 13 - (rosenboth, HybridFraction(SR1(init_with_hess=True))), # 14 (fletcher, FX(BFGS())), # 15 (fletcher, FX(SR1())), # 16 - (fletcher, FX(BFGS(init_with_hess=True))), # 17 - (fletcher, FX(SR1(init_with_hess=True))), # 18 (fletcher, SSM(0.0)), # 19 (fletcher, SSM(0.5)), # 20 (fletcher, SSM(1.0)), # 21 @@ -494,3 +488,140 @@ def test_wrong_options(): verbose=logging.INFO, options={Options.SUBSPACE_DIM: '2D'}, ) + + +def test_hess0_initialization(): + """ + Test that hess0 parameter correctly initializes Hessian approximation. + """ + lb, ub, x0 = finite_bounds_include_optimum() + fun = rosengrad + fun_with_hess = rosenboth + + # Test 1: Verify hess0 is used when provided with hessian_update + custom_hess0 = np.eye(len(x0)) * 10.0 + opt_with_hess0 = Optimizer( + fun, + ub=ub, + lb=lb, + verbose=logging.WARNING, + options={Options.MAXITER: 1}, # Only run one iteration + hessian_update=BFGS(), + ) + opt_with_hess0.minimize(x0, hess0=custom_hess0) + assert opt_with_hess0.hess is not None + + # Test 2: Verify default initialization when hess0 is not provided + opt_without_hess0 = Optimizer( + fun, + ub=ub, + lb=lb, + verbose=logging.WARNING, + options={Options.MAXITER: 1}, + hessian_update=BFGS(), + ) + opt_without_hess0.minimize(x0) + + # Test 3: Verify hess0 has correct dimensions + wrong_dim_hess0 = np.eye(len(x0) + 1) + opt_wrong_dim = Optimizer( + fun, + ub=ub, + lb=lb, + verbose=logging.WARNING, + hessian_update=BFGS(), + ) + with pytest.raises(ValueError): + opt_wrong_dim.minimize(x0, hess0=wrong_dim_hess0) + + # Test 4: Verify hess0 works with different update schemes + for happ_class in [BFGS, DFP, SR1, Broyden]: + happ = happ_class() if happ_class != Broyden else Broyden(phi=0.5) + custom_hess = np.eye(len(x0)) * 5.0 + opt = Optimizer( + fun, + ub=ub, + lb=lb, + verbose=logging.WARNING, + options={Options.MAXITER: 2, Options.FATOL: 0}, + hessian_update=happ, + ) + opt.minimize(x0, hess0=custom_hess) + assert opt.iteration >= 1, f'Failed for {happ_class.__name__}' + + # Test 5: Verify hess0 is ignored when no hessian_update is provided + opt_no_update = Optimizer( + fun_with_hess, + ub=ub, + lb=lb, + verbose=logging.WARNING, + options={Options.MAXITER: 1}, + ) + hess0_ignored = np.eye(len(x0)) * 100.0 + opt_no_update.minimize(x0, hess0=hess0_ignored) + + # Test 6: Test initialization with exact Hessian + opt_hess_init = Optimizer( + fun_with_hess, + ub=ub, + lb=lb, + verbose=logging.WARNING, + options={Options.MAXITER: 10, Options.FATOL: 1e-8}, + hessian_update=HybridFixed(BFGS()), + ) + opt_hess_init.minimize(x0, hess0='hess') + iterations_with_hess = opt_hess_init.iteration + + # Compare with BFGS without using initial Hessian + opt_no_hess_init = Optimizer( + fun, + ub=ub, + lb=lb, + verbose=logging.WARNING, + options={Options.MAXITER: 10, Options.FATOL: 1e-8}, + hessian_update=BFGS(), + ) + opt_no_hess_init.minimize(x0) + iterations_without_hess = opt_no_hess_init.iteration + + # Using exact Hessian for initialization should help convergence + assert iterations_with_hess <= iterations_without_hess or ( + opt_hess_init.converged and opt_no_hess_init.converged + ), 'Hessian initialization should help convergence' + + # Test 8: Verify hess0 affects convergence behavior + true_hess_at_x0 = np.array( + [ + [1200 * x0[0] ** 2 - 400 * x0[1] + 2, -400 * x0[0]], + [-400 * x0[0], 200], + ] + ) + + opt_good_init = Optimizer( + fun, + ub=ub, + lb=lb, + verbose=logging.WARNING, + options={Options.MAXITER: 100, Options.FATOL: 1e-8}, + hessian_update=BFGS(), + ) + opt_good_init.minimize(x0, hess0=true_hess_at_x0) + iterations_good = opt_good_init.iteration + + # Use a poor initial Hessian approximation + poor_hess = np.eye(len(x0)) * 0.01 + opt_poor_init = Optimizer( + fun, + ub=ub, + lb=lb, + verbose=logging.WARNING, + options={Options.MAXITER: 100, Options.FATOL: 1e-8}, + hessian_update=BFGS(), + ) + opt_poor_init.minimize(x0, hess0=poor_hess) + iterations_poor = opt_poor_init.iteration + + # Good initialization should converge in fewer or equal iterations + assert iterations_good <= iterations_poor or ( + opt_good_init.converged and opt_poor_init.converged + ), 'Good Hessian initialization should help convergence'