From afc7f0b8d6d86785fc6cbd6d9ca5085327d4f920 Mon Sep 17 00:00:00 2001 From: cfeitong Date: Sat, 6 Jan 2018 00:57:23 +0800 Subject: [PATCH 1/6] local relinalg --- Cargo.toml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 5502be8f..f0a46151 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,5 +17,6 @@ datasets = [] [dependencies] num = { version = "0.1.41", default-features = false } -rand = "0.4.1" -rulinalg = { git = "https://github.com/AtheMathmo/rulinalg", rev = "1ed8b937" } +rand = "0.4" +# rulinalg = { git = "https://github.com/AtheMathmo/rulinalg", rev = "1ed8b937" } +rulinalg = { path = "../rulinalg" } From 31a1137f81c7353581b7f794447285742ce0b86e Mon Sep 17 00:00:00 2001 From: cfeitong Date: Sun, 21 Jan 2018 13:26:10 +0800 Subject: [PATCH 2/6] save work --- src/learning/optim/grad_desc.rs | 43 ++++++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 11 deletions(-) diff --git a/src/learning/optim/grad_desc.rs b/src/learning/optim/grad_desc.rs index 1e114877..b951b628 100644 --- a/src/learning/optim/grad_desc.rs +++ b/src/learning/optim/grad_desc.rs @@ -107,6 +107,8 @@ pub struct StochasticGD { mu: f64, /// The number of passes through the data. iters: usize, + /// Use Nesterove momentum or not + nesterove_momentum: bool, } /// The default Stochastic GD algorithm. @@ -116,12 +118,14 @@ pub struct StochasticGD { /// - alpha = 0.1 /// - mu = 0.1 /// - iters = 20 +/// - nestorove = false impl Default for StochasticGD { fn default() -> StochasticGD { StochasticGD { alpha: 0.1, mu: 0.1, iters: 20, + nesterove_momentum: false, } } } @@ -132,8 +136,6 @@ impl StochasticGD { /// Requires the learning rate, momentum rate and iteration count /// to be specified. /// - /// With Nesterov momentum by default. - /// /// # Examples /// /// ``` @@ -149,8 +151,23 @@ impl StochasticGD { alpha: alpha, mu: mu, iters: iters, + nesterove_momentum: false, } } + + /// Enable Nesterove momentum for stochastic gradient descent algorithm. + /// + /// # Examples + /// + /// ``` + /// use rusty_machine::learning::optim::grad_desc::StochasticGD; + /// + /// let sgd = StochasticGD::new(0.1, 0.3, 5).with_nesterove_momentum(); + /// ``` + pub fn with_nesterove_momentum(mut self) -> StochasticGD { + self.nesterove_momentum = true; + self + } } impl OptimAlgorithm for StochasticGD @@ -184,15 +201,19 @@ impl OptimAlgorithm for StochasticGD &inputs.select_rows(&[*i]), &targets.select_rows(&[*i])); - // Backup previous velocity - let prev_w = delta_w.clone(); - // Compute the difference in gradient using Nesterov momentum - delta_w = Vector::new(vec_data) * self.mu + &delta_w * self.alpha; - // Update the parameters - optimizing_val = &optimizing_val - - (&prev_w * (-self.alpha) + &delta_w * (1. + self.alpha)); - // Set the end cost (this is only used after the last iteration) - end_cost += cost; + if self.nesterove_momentum { + // Backup previous velocity + let prev_w = delta_w.clone(); + // Compute the difference in gradient using Nesterov momentum + delta_w = Vector::new(vec_data) * self.mu + &delta_w * self.alpha; + // Update the parameters + optimizing_val = &optimizing_val - + (&prev_w * (-self.alpha) + &delta_w * (1. + self.alpha)); + // Set the end cost (this is only used after the last iteration) + end_cost += cost; + } else { + + } } end_cost /= inputs.rows() as f64; From 91f26b2d8fd91204300d0c9ba560e23f1583c4d0 Mon Sep 17 00:00:00 2001 From: cfeitong Date: Sun, 21 Jan 2018 13:28:47 +0800 Subject: [PATCH 3/6] Revert "Nesterov Momentum" This reverts commit ddc4c767b0ecd3d93c0b554ee05d880449957320. --- src/learning/optim/grad_desc.rs | 9 ++------- tests/learning/optim/grad_desc.rs | 2 +- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/src/learning/optim/grad_desc.rs b/src/learning/optim/grad_desc.rs index 1e114877..471edca4 100644 --- a/src/learning/optim/grad_desc.rs +++ b/src/learning/optim/grad_desc.rs @@ -131,8 +131,6 @@ impl StochasticGD { /// /// Requires the learning rate, momentum rate and iteration count /// to be specified. - /// - /// With Nesterov momentum by default. /// /// # Examples /// @@ -184,13 +182,10 @@ impl OptimAlgorithm for StochasticGD &inputs.select_rows(&[*i]), &targets.select_rows(&[*i])); - // Backup previous velocity - let prev_w = delta_w.clone(); - // Compute the difference in gradient using Nesterov momentum + // Compute the difference in gradient using momentum delta_w = Vector::new(vec_data) * self.mu + &delta_w * self.alpha; // Update the parameters - optimizing_val = &optimizing_val - - (&prev_w * (-self.alpha) + &delta_w * (1. + self.alpha)); + optimizing_val = &optimizing_val - &delta_w * self.mu; // Set the end cost (this is only used after the last iteration) end_cost += cost; } diff --git a/tests/learning/optim/grad_desc.rs b/tests/learning/optim/grad_desc.rs index 9dd1281a..f9f74303 100644 --- a/tests/learning/optim/grad_desc.rs +++ b/tests/learning/optim/grad_desc.rs @@ -58,7 +58,7 @@ fn convex_gd_training() { fn convex_stochastic_gd_training() { let x_sq = XSqModel { c: 20f64 }; - let gd = StochasticGD::new(0.9f64, 0.1f64, 100); + let gd = StochasticGD::new(0.5f64, 1f64, 100); let test_data = vec![100f64]; let params = gd.optimize(&x_sq, &test_data[..], From f658b1a700f2ab2e2465fe3e7a9c8d5f5fbb5472 Mon Sep 17 00:00:00 2001 From: cfeitong Date: Sun, 21 Jan 2018 13:30:37 +0800 Subject: [PATCH 4/6] make Nesterove momentum for SGD optional --- src/learning/optim/grad_desc.rs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/learning/optim/grad_desc.rs b/src/learning/optim/grad_desc.rs index b951b628..1a6bbb58 100644 --- a/src/learning/optim/grad_desc.rs +++ b/src/learning/optim/grad_desc.rs @@ -209,11 +209,15 @@ impl OptimAlgorithm for StochasticGD // Update the parameters optimizing_val = &optimizing_val - (&prev_w * (-self.alpha) + &delta_w * (1. + self.alpha)); - // Set the end cost (this is only used after the last iteration) - end_cost += cost; } else { - + // Compute the difference in gradient using momentum + delta_w = Vector::new(vec_data) * self.mu + &delta_w * self.alpha; + // Update the parameters + optimizing_val = &optimizing_val - &delta_w * self.mu; } + + // Set the end cost (this is only used after the last iteration) + end_cost += cost; } end_cost /= inputs.rows() as f64; From fbe169b0c768531ccd9ccb1016023991e94971e5 Mon Sep 17 00:00:00 2001 From: cfeitong Date: Sun, 21 Jan 2018 13:32:11 +0800 Subject: [PATCH 5/6] add test for nesterove momentum --- tests/learning/optim/grad_desc.rs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/learning/optim/grad_desc.rs b/tests/learning/optim/grad_desc.rs index 9dd1281a..ad9d6abd 100644 --- a/tests/learning/optim/grad_desc.rs +++ b/tests/learning/optim/grad_desc.rs @@ -69,6 +69,21 @@ fn convex_stochastic_gd_training() { assert!(x_sq.compute_grad(¶ms, &Matrix::zeros(1, 1), &Matrix::zeros(1, 1)).0 < 1e-10); } +#[test] +fn convex_stochastic_gd_nesterove_momentum_training() { + let x_sq = XSqModel { c: 20f64 }; + + let gd = StochasticGD::new(0.9f64, 0.1f64, 100).with_nesterove_momentum(); + let test_data = vec![100f64]; + let params = gd.optimize(&x_sq, + &test_data[..], + &Matrix::zeros(100, 1), + &Matrix::zeros(100, 1)); + + assert!(params[0] - 20f64 < 1e-10); + assert!(x_sq.compute_grad(¶ms, &Matrix::zeros(1, 1), &Matrix::zeros(1, 1)).0 < 1e-10); +} + #[test] fn convex_adagrad_training() { let x_sq = XSqModel { c: 20f64 }; From ff681877d6e6b2b904b16c23f4b2190442fd765c Mon Sep 17 00:00:00 2001 From: cfeitong Date: Sun, 21 Jan 2018 13:40:41 +0800 Subject: [PATCH 6/6] revert cargo.toml --- Cargo.toml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index f0a46151..5502be8f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,6 +17,5 @@ datasets = [] [dependencies] num = { version = "0.1.41", default-features = false } -rand = "0.4" -# rulinalg = { git = "https://github.com/AtheMathmo/rulinalg", rev = "1ed8b937" } -rulinalg = { path = "../rulinalg" } +rand = "0.4.1" +rulinalg = { git = "https://github.com/AtheMathmo/rulinalg", rev = "1ed8b937" }