From 82f476bd18bea6034d087e0103eff5617d975fe1 Mon Sep 17 00:00:00 2001 From: Vassilis Vassiliadis Date: Thu, 8 Nov 2018 13:16:13 +0000 Subject: [PATCH 01/37] Re-enable truly_random optimizer - Task-migration still fails but much less frequently --- src/optimizer.cpp | 12 +++++++++++- src/scheduler.cpp | 14 ++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/src/optimizer.cpp b/src/optimizer.cpp index 24419b2..e54f678 100644 --- a/src/optimizer.cpp +++ b/src/optimizer.cpp @@ -453,7 +453,7 @@ hpx::future global_optimizer::decide_random_mapping(const std::vector global_optimizer::decide_random_mapping(const std::vector global_optimizer::balance_ino(const std::vector & #ifdef INO_DEBUG_DECIDE_SCHEDULE std::cerr << "Ino picked a schedule" << std::endl; #endif + for (auto node_wis : ino_schedule) for (auto wi : node_wis.second.v_work_items) new_mapping[wi] = node_wis.first; diff --git a/src/scheduler.cpp b/src/scheduler.cpp index 43d9605..662151c 100644 --- a/src/scheduler.cpp +++ b/src/scheduler.cpp @@ -188,6 +188,8 @@ namespace allscale return "ino"; case random: return "random"; + case truly_random: + return "truly_random"; default: return "unknown"; } @@ -224,6 +226,13 @@ namespace allscale tree_scheduling_policy::create_uniform(allscale::get_num_localities()) }; } + if (policy == "truly_random") + { + return { + replacable_policy::truly_random, + tree_scheduling_policy::create_uniform(allscale::get_num_localities()) + }; + } if (policy == "random") { return { @@ -394,6 +403,11 @@ namespace allscale optimizer_.balance_ino(old.task_distribution_mapping()); } + if (policy_.value_ == replacable_policy::truly_random) { + tree_scheduling_policy const& old = static_cast(*policy_.policy_); + optimizer_.decide_random_mapping(old.task_distribution_mapping()); + } + return true; } From 7ba05c445d1a4047ebf4b31fef5b5af196213cb7 Mon Sep 17 00:00:00 2001 From: Vassilis Vassiliadis Date: Fri, 9 Nov 2018 13:58:45 +0000 Subject: [PATCH 02/37] Power instead of Energy for INO, plus some integration with Dashboard - Dashboard displays power as a fraction of the current power consumption over the maximum power consumption. - It uses a model to generate these values. That can be inaccurate. - This commit, reads the real power consumption but does not supply a maximum power consumption. --- src/components/localoptimizer.cpp | 1 - src/components/monitor_component.cpp | 36 +++++++++++++++++++++++++++- src/dashboard.cpp | 4 +++- src/optimizer.cpp | 11 ++++----- 4 files changed, 43 insertions(+), 9 deletions(-) diff --git a/src/components/localoptimizer.cpp b/src/components/localoptimizer.cpp index 86faa91..1fae44c 100644 --- a/src/components/localoptimizer.cpp +++ b/src/components/localoptimizer.cpp @@ -19,7 +19,6 @@ // only meant to be defined if one needs to measure the efficacy // of the scheduler //#define ALLSCALE_HAVE_CPUFREQ 1 -#define ALLSCALE_USE_CORE_OFFLINING 1 namespace allscale { namespace components { diff --git a/src/components/monitor_component.cpp b/src/components/monitor_component.cpp index 667046b..947bac4 100644 --- a/src/components/monitor_component.cpp +++ b/src/components/monitor_component.cpp @@ -26,6 +26,11 @@ #include +#ifdef ALLSCALE_HAVE_CPUFREQ +#define POWER_MEASUREMENT_PERIOD_MS 100 +#include +#endif + #ifdef HAVE_PAPI #include #include @@ -329,13 +334,42 @@ namespace allscale { namespace components { float monitor::get_current_power() { +#ifdef ALLSCALE_HAVE_CPUFREQ + /*VV: Read potentially multiple measurements of power within the span of + POWER_MEASUREMENT_PERIOD_MS milliseconds. Each time this function + is invoked it returns the running average of power.*/ + static unsigned long long times_read_power=1; + static unsigned long long power_sum = util::hardware_reconf::read_system_power(); + + static long timestamp_reset_power = std::chrono::time_point_cast(std::chrono::system_clock::now()).time_since_epoch().count(); + + long t_now = std::chrono::time_point_cast(std::chrono::system_clock::now()).time_since_epoch().count(); + + auto dt = t_now - timestamp_reset_power; + times_read_power ++; + + power_sum += util::hardware_reconf::read_system_power(); + + float ret = power_sum / (float)(times_read_power); + + if ( dt >= POWER_MEASUREMENT_PERIOD_MS ) { + times_read_power = 0; + power_sum = 0ull; + timestamp_reset_power = t_now; + } + + return ret; +#else return allscale::power::estimate_power(get_current_freq(0)) * num_cpus_; +#endif } float monitor::get_max_power() { -#ifdef POWER_ESTIMATE +#if defined(ALLSCALE_HAVE_CPUFREQ) + return 0.0; +#elif defined(POWER_ESTIMATE) return allscale::power::estimate_power(get_max_freq(0)) * num_cpus_; #else return 0.0; diff --git a/src/dashboard.cpp b/src/dashboard.cpp index 0adc50f..ed528c1 100644 --- a/src/dashboard.cpp +++ b/src/dashboard.cpp @@ -60,7 +60,9 @@ namespace allscale { namespace dashboard state.speed = 1.f - state.idle_rate; state.efficiency = state.speed * ((state.cur_frequency * active_cores) / (state.max_frequency * state.num_cores)); -#ifdef POWER_ESTIMATE +#ifdef ALLSCALE_HAVE_CPUFREQ + state.power = monitor_c->get_current_power(); +#elif defined(POWER_ESTIMATE) state.cur_power = monitor_c->get_current_power(); state.max_power = monitor_c->get_max_power(); state.power = state.cur_power / state.max_power; diff --git a/src/optimizer.cpp b/src/optimizer.cpp index e54f678..d35ed9a 100644 --- a/src/optimizer.cpp +++ b/src/optimizer.cpp @@ -29,7 +29,6 @@ namespace allscale { optimizer_state get_optimizer_state() { - static float last_energy = 0.f; float load = 1.f - monitor::get().get_idle_rate(); float my_time = monitor::get().get_avg_time_last_iterations(HISTORY_ITERATIONS); @@ -37,16 +36,16 @@ namespace allscale my_time = -1.f; allscale::components::monitor *monitor_c = &allscale::monitor::get(); - float energy = 100.f; -#ifdef POWER_ESTIMATE - energy = monitor_c->get_current_power(); + float power_now = 100.f; +#if defined(POWER_ESTIMATE) || defined(ALLSCALE_HAVE_CPUFREQ) + power_now = monitor_c->get_current_power(); #endif - + // VV: Use power as if it were energy return { load, monitor::get().get_task_times(), my_time, - energy, + power_now, float(monitor_c->get_current_freq(0)), scheduler::get().get_active_threads() }; From 3b2d71fa7a29c9468f506344183f1e4104cb53c4 Mon Sep 17 00:00:00 2001 From: Vassilis Vassiliadis Date: Mon, 12 Nov 2018 10:57:00 +0000 Subject: [PATCH 03/37] Patching up intra-node optimizer --- allscale/components/localoptimizer.hpp | 15 ++++--- src/components/localoptimizer.cpp | 56 +++++++++++++++++--------- src/components/nmsimplex_bbincr.cpp | 12 +++--- src/components/scheduler_component.cpp | 47 ++++++++------------- 4 files changed, 69 insertions(+), 61 deletions(-) diff --git a/allscale/components/localoptimizer.hpp b/allscale/components/localoptimizer.hpp index c0e588a..1f7aae0 100644 --- a/allscale/components/localoptimizer.hpp +++ b/allscale/components/localoptimizer.hpp @@ -15,7 +15,8 @@ //#define MEASURE_MANUAL_ 1 #define MEASURE_ 1 -//#define DEBUG_ 1 +#define DEBUG_ 1 +#define DEBUG_MULTIOBJECTIVE_ 1 namespace allscale { namespace components { @@ -71,7 +72,8 @@ namespace allscale { namespace components { /* index to the global cpu-supported frequencies vector pointing to the new frequency to be set. If set to -1, frequency will stay unchanged */ - unsigned int frequency_idx; + int frequency_idx; + int previous_frequency_idx; #endif }; @@ -124,6 +126,9 @@ namespace allscale { namespace components { return frequencies_param_allowed_; } #endif + std::size_t getmaxthreads() { + return max_threads_; + } void setmaxthreads(std::size_t threads){ max_threads_=threads; @@ -197,12 +202,6 @@ namespace allscale { namespace components { /* vector containing sorted list of frequencies supported by the processor */ std::vector frequencies_param_allowed_; - - /* index to the vector of allowed frequencies that points to the highest - frequency. The ordering of the vector, as reported by hardware - reconfiguration can be platform specific, and therefore we need this - index to make sorted access to the vector platform agnostic */ - const short unsigned int highest_frequency_allowed_idx_ = 0; #endif /* threshold (percentage in [0,1]) to decide convergence of optimization diff --git a/src/components/localoptimizer.cpp b/src/components/localoptimizer.cpp index 1fae44c..0d5d59b 100644 --- a/src/components/localoptimizer.cpp +++ b/src/components/localoptimizer.cpp @@ -10,9 +10,9 @@ #include #include -//#define DEBUG_ 1 +#define DEBUG_ 1 //#define DEBUG_INIT_ 1 // define to generate output during scheduler initialization -//#define DEBUG_MULTIOBJECTIVE_ 1 +#define DEBUG_MULTIOBJECTIVE_ 1 //#define DEBUG_CONVERGENCE_ 1 //#define MEASURE_MANUAL 1 // define to generate output consumed by the regression test #define MEASURE_ 1 @@ -109,6 +109,9 @@ void localoptimizer::printobjectives(){ } void localoptimizer::printverbosesteps(actuation act){ + static int last_frequency_idx = 0; + + std::cout << "[INFO]"; if (optmethod_==random) std::cout << "Random "; @@ -116,18 +119,24 @@ void localoptimizer::printverbosesteps(actuation act){ std::cout << "Allscale "; } std::cout << "Scheduler Step: Setting OS Threads to " << threads_param_; -#ifdef ALLSCALE_HAVE_CPUFREQ - std::cout << ", CPU Frequency to " << frequencies_param_allowed_[act.frequency_idx] + #ifdef ALLSCALE_HAVE_CPUFREQ + if ( act.frequency_idx >= 0 ) + last_frequency_idx = act.frequency_idx; + std::cout << " , CPU Frequency to " << frequencies_param_allowed_[last_frequency_idx] << std::endl; #else std::cout << std::endl; #endif - } #endif void localoptimizer::measureObjective(double iter_time, double power, double threads){ + std::cout <<"Measuring objective: " + << iter_time << " " + << power << " " + << threads << std::endl; + for(auto& el: objectives_){ switch (el.type){ case time: @@ -235,18 +244,18 @@ actuation localoptimizer::step() /* random optimization step */ if (optmethod_ == random) { - act.delta_threads = (rand() % max_threads_) - threads_param_; + act.delta_threads = (rand() % max_threads_); #ifdef ALLSCALE_HAVE_CPUFREQ act.frequency_idx = rand() % frequencies_param_allowed_.size(); - if (act.frequency_idx == frequency_param_) - act.frequency_idx = -1; + // if (act.frequency_idx == frequency_param_) + // act.frequency_idx = -1; #endif } else if (optmethod_ == allscale) { if (current_objective_idx_ > objectives_.size()) - return act; + goto validate_act; if (steps_ < warmup_steps_) { @@ -260,7 +269,7 @@ actuation localoptimizer::step() #ifdef ALLSCALE_HAVE_CPUFREQ act.frequency_idx = rand() % frequencies_param_allowed_.size(); #endif - return act; + goto validate_act; } // iterate over all objectives in decreasing priority @@ -305,7 +314,7 @@ actuation localoptimizer::step() double constraint_min[]={1,0}; double constraint_max[]={(double)max_threads_, (double)frequencies_param_allowed_.size()}; - + std::cout << "initialize_simplex::Initializing with " << frequencies_param_allowed_.size() << " frequencies" << std::endl; nmd.initialize_simplex(params,values,constraint_min,constraint_max); objectives_[current_objective_idx_].initialized=true; #endif @@ -420,8 +429,6 @@ actuation localoptimizer::step() act.frequency_idx = (int)priority_obj.minimization_params[1]* (max_leeway_value/priority_obj.converged_minimum); #endif - //act.delta_threads=minimization_point[0]; - //act.frequency_idx=minimization_point[1]; current_objective_idx_++; if (current_objective_idx_ == objectives_.size()) { @@ -430,15 +437,28 @@ actuation localoptimizer::step() std::cout << "[LOCALOPTIMIZER|INFO] ALL OBJECTIVES HAVE CONVERGED " << std::endl; #endif } - return act; - } - } - act.delta_threads=(nmd_res.threads==0)?getCurrentThreads():nmd_res.threads; + act.delta_threads=(nmd_res.threads==0)?getCurrentThreads():nmd_res.threads; #ifdef ALLSCALE_HAVE_CPUFREQ - act.frequency_idx=nmd_res.freq_idx; + act.frequency_idx=nmd_res.freq_idx; #endif + + goto validate_act; + } + } } } + validate_act: + + if ( act.delta_threads > max_threads_) { + act.delta_threads = max_threads_; + } else if ( act.delta_threads < 1 ) { + act.delta_threads = getCurrentThreads(); + } +#ifdef ALLSCALE_HAVE_CPUFREQ + // VV: If freq_idx is -1 then set it to last used frequency (frequency_param_) + if ( act.frequency_idx < 0) + act.frequency_idx= frequency_param_; +#endif return act; } } diff --git a/src/components/nmsimplex_bbincr.cpp b/src/components/nmsimplex_bbincr.cpp index 97736cd..a0c964e 100644 --- a/src/components/nmsimplex_bbincr.cpp +++ b/src/components/nmsimplex_bbincr.cpp @@ -11,8 +11,8 @@ */ #include -//#define NMD_DEBUG_ 1 -//#define NMD_INFO_ 1 +#define NMD_DEBUG_ 1 +#define NMD_INFO_ 1 /* create the initial simplex @@ -212,11 +212,13 @@ optstepresult NelderMead::step(double param) for (j=0;j<=n-1;j++) { /*vr[j] = (1+ALPHA)*vm[j] - ALPHA*v[vg][j];*/ /* - std::cout << "vm[" << j << "]=" << vm[j] << std::endl; - std::cout << "v[vg" << j << "]=" << v[vg][j] << std::endl; - std::cout << "ALPHA=" << ALPHA << std::endl; */ vr[j] = vm[j]+ALPHA*(vm[j]-v[vg][j]); + + // std::cout << "vm[" << j << "]=" << vm[j] << std::endl; + // std::cout << "v[vg" << j << "]=" << v[vg][j] << std::endl; + // std::cout << "ALPHA=" << ALPHA << std::endl; + // std::cout << "Vr[" << j << "]=" << vr[j] << std::endl; } my_constraints(vr); #ifdef NMD_DEBUG_ diff --git a/src/components/scheduler_component.cpp b/src/components/scheduler_component.cpp index 7185b23..678d539 100644 --- a/src/components/scheduler_component.cpp +++ b/src/components/scheduler_component.cpp @@ -22,7 +22,7 @@ //#define DEBUG_ 1 //#define DEBUG_INIT_ 1 // define to generate output during scheduler initialization -//#define DEBUG_MULTIOBJECTIVE_ 1 +#define DEBUG_MULTIOBJECTIVE_ 1 //#define DEBUG_THREADTHROTTLING_ 1 //#define DEBUG_THREADSTATUS_ 1 //#define DEBUG_FREQSCALING_ 1 @@ -719,7 +719,6 @@ void scheduler::optimize_locally(work_item const& work) /* Count Active threads for validation*/ hpx::threads::mask_type active_mask; - std::size_t active_threads_ = 0; std::size_t domain_active_threads = 0; std::size_t pool_idx = 0; int total_threads_counted=0; @@ -741,14 +740,13 @@ void scheduler::optimize_locally(work_item const& work) #ifdef MEASURE_ #ifdef ALLSCALE_HAVE_CPUFREQ std::size_t temp_id = work.id().id; - if ((temp_id >= period_for_power) && - (temp_id % period_for_power == 0)) + if ((temp_id >= period_for_power) && (temp_id % period_for_power == 0)) update_power_consumption(hardware_reconf::read_system_power()); #endif #endif #ifdef ALLSCALE_HAVE_CPUFREQ - if (uselopt && !lopt_.isConverged()){ + if (uselopt && !lopt_.isConverged()) { last_power_usage++; current_power_usage = hardware_reconf::read_system_power(); power_sum += current_power_usage; @@ -775,7 +773,8 @@ void scheduler::optimize_locally(work_item const& work) } lopt_.measureObjective(current_avg_iter_time,power_sum/last_power_usage, - active_threads); + // active_threads + lopt_.getCurrentThreads()); last_power_usage=0; power_sum=0; } @@ -790,39 +789,27 @@ void scheduler::optimize_locally(work_item const& work) lopt_.printverbosesteps(act_temp); #endif // amend threads if signaled - /* - if (act_temp.delta_threads<0){ - unsigned int suspended_temp = - suspend_threads(-1 * act_temp.delta_threads); - lopt_.setCurrentThreads(lopt_.getCurrentThreads()-suspended_temp); - } - else if (act_temp.delta_threads>0){ - unsigned int resumed_temp = - resume_threads(act_temp.delta_threads); - lopt_.setCurrentThreads(lopt_.getCurrentThreads()+resumed_temp); - } - */ - + if (act_temp.delta_threads < active_threads){ #ifdef DEBUG_MULTIOBJECTIVE_ - int new_threads_target = (int)active_threads - act_temp.delta_threads; - std::cout << "[SCHEDULER|INFO]: Optimizer induced threads to suspend: " << new_threads_target << std::endl; - std::cout << "[SCHEDULER|INFO]: Active Threads = " << active_threads << ", target threads = " << act_temp.delta_threads << std::endl; -#endif + std::cout << "[SCHEDULER|INFO]: Active Threads = " << active_threads << " out of " << lopt_.getmaxthreads() + << " , target threads = " << act_temp.delta_threads << std::endl; + +#endif //unsigned int suspended_temp = suspend_threads(new_threads_target); //lopt_.setCurrentThreads(lopt_.getCurrentThreads()-suspended_temp); - - lopt_.setCurrentThreads(active_threads); + suspend_threads(active_threads-act_temp.delta_threads); } else if (act_temp.delta_threads > active_threads){ #ifdef DEBUG_MULTIOBJECTIVE_ - int new_threads_target = act_temp.delta_threads - (int)active_threads; - std::cout << "[SCHEDULER|INFO]: Optimizer induced threads to resume to: " << new_threads_target << std::endl; - std::cout << "[SCHEDULER|INFO]: Active Threads = " << active_threads << ", target threads = " << act_temp.delta_threads << std::endl; + std::cout << "[SCHEDULER|INFO]: Active Threads = " << active_threads << " out of " << lopt_.getmaxthreads() + << " , target threads = " << act_temp.delta_threads << std::endl; #endif - fix_allcores_frequencies(act_temp.frequency_idx); - lopt_.setCurrentFrequencyIdx(act_temp.frequency_idx); + resume_threads(act_temp.delta_threads - active_threads); } + fix_allcores_frequencies(act_temp.frequency_idx); + lopt_.setCurrentFrequencyIdx(act_temp.frequency_idx); + lopt_.setCurrentThreads(act_temp.delta_threads); } } // uselopt #endif From a673f4344ac6e856f47be32f534b0b1e31e0bde6 Mon Sep 17 00:00:00 2001 From: Vassilis Vassiliadis Date: Tue, 13 Nov 2018 08:58:18 +0000 Subject: [PATCH 04/37] Optimize NelderMead --- allscale/components/nmsimplex_bbincr.hpp | 12 +- src/components/localoptimizer.cpp | 685 +++++++++++---------- src/components/nmsimplex_bbincr.cpp | 744 ++++++++++++----------- src/components/scheduler_component.cpp | 21 +- src/components/util/hardware_reconf.cpp | 2 + 5 files changed, 798 insertions(+), 666 deletions(-) diff --git a/allscale/components/nmsimplex_bbincr.hpp b/allscale/components/nmsimplex_bbincr.hpp index f894d2b..ea4f3bd 100644 --- a/allscale/components/nmsimplex_bbincr.hpp +++ b/allscale/components/nmsimplex_bbincr.hpp @@ -30,6 +30,7 @@ namespace allscale { namespace components { #define ALPHA 1.0 /* reflection coefficient */ #define BETA 0.5 /* contraction coefficient */ #define GAMMA 2.0 /* expansion coefficient */ +#define DELTA 0.5 /* shrinking coefficient */ /* structure type of a single optimization step return status */ struct optstepresult{ @@ -42,7 +43,8 @@ struct optstepresult{ }; /* enumeration encoding state that the incremental Nelder Mead optimizer is at */ -enum iterationstates {start, reflection, expansion, contraction}; +enum iterationstates {start, reflection, expansion, + contraction, shrink}; class NelderMead { @@ -63,9 +65,17 @@ class NelderMead { unsigned long int getIterations(){return itr;} private: + + optstepresult do_step_start(double param); + optstepresult do_step_reflect(double param); + optstepresult do_step_expand(double param); + optstepresult do_step_contract(double param); + optstepresult do_step_shrink(double param); + int vg_index(); int vs_index(); int vh_index(); + void sort_vertices(void); void my_constraints(double*); void centroid(); bool testConvergence(); diff --git a/src/components/localoptimizer.cpp b/src/components/localoptimizer.cpp index 0d5d59b..593853b 100644 --- a/src/components/localoptimizer.cpp +++ b/src/components/localoptimizer.cpp @@ -13,117 +13,130 @@ #define DEBUG_ 1 //#define DEBUG_INIT_ 1 // define to generate output during scheduler initialization #define DEBUG_MULTIOBJECTIVE_ 1 -//#define DEBUG_CONVERGENCE_ 1 +#define DEBUG_CONVERGENCE_ 1 //#define MEASURE_MANUAL 1 // define to generate output consumed by the regression test #define MEASURE_ 1 // only meant to be defined if one needs to measure the efficacy // of the scheduler //#define ALLSCALE_HAVE_CPUFREQ 1 -namespace allscale { -namespace components { +namespace allscale +{ +namespace components +{ localoptimizer::localoptimizer(std::list targetobjectives) - : objectives_((int)targetobjectives.size()), - nmd(0.01), - param_changes_(0), - steps_(0), - current_param_(thread), - converged_(false) + : objectives_((int)targetobjectives.size()), + nmd(convergence_threshold_), + param_changes_(0), + steps_(0), + current_param_(thread), + converged_(false) +{ + for (objective o : targetobjectives) { - for (objective o : targetobjectives) { - //std::cout << o.type << "," << o.leeway << "," << o.priority << '\n'; - objectives_[o.priority] = o; - objectives_[o.priority].localmin=10000; - objectives_[o.priority].globalmin=10000; - objectives_[o.priority].localmax=0.0; - objectives_[o.priority].globalmax=0.0; - objectives_[o.priority].converged=false; - objectives_[o.priority].initialized=false; - objectives_[o.priority].min_params_idx=0; - objectives_[o.priority].converged_minimum=0; - } + //std::cout << o.type << "," << o.leeway << "," << o.priority << '\n'; + objectives_[o.priority] = o; + objectives_[o.priority].localmin = 10000; + objectives_[o.priority].globalmin = 10000; + objectives_[o.priority].localmax = 0.0; + objectives_[o.priority].globalmax = 0.0; + objectives_[o.priority].converged = false; + objectives_[o.priority].initialized = false; + objectives_[o.priority].min_params_idx = 0; + objectives_[o.priority].converged_minimum = 0; + } #ifdef ALLSCALE_HAVE_CPUFREQ - setCurrentFrequencyIdx(0); + setCurrentFrequencyIdx(0); #endif }; -void localoptimizer::setobjectives(std::list targetobjectives){ +void localoptimizer::setobjectives(std::list targetobjectives) +{ objectives_.clear(); objectives_.resize((int)targetobjectives.size()); - for (objective o : targetobjectives) { + for (objective o : targetobjectives) + { //std::cout << o.type << "," << o.leeway << "," << o.priority << '\n'; objectives_[o.priority] = o; - objectives_[o.priority].localmin=10000; - objectives_[o.priority].globalmin=10000; - objectives_[o.priority].localmax=0.0; - objectives_[o.priority].globalmax=0.0; - objectives_[o.priority].converged=false; - objectives_[o.priority].initialized=false; - objectives_[o.priority].min_params_idx=0; - objectives_[o.priority].converged_minimum=0; + objectives_[o.priority].localmin = 10000; + objectives_[o.priority].globalmin = 10000; + objectives_[o.priority].localmax = 0.0; + objectives_[o.priority].globalmax = 0.0; + objectives_[o.priority].converged = false; + objectives_[o.priority].initialized = false; + objectives_[o.priority].min_params_idx = 0; + objectives_[o.priority].converged_minimum = 0; } - steps_=0; - param_changes_=0; - current_param_=thread; + steps_ = 0; + param_changes_ = 0; + current_param_ = thread; #ifdef ALLSCALE_HAVE_CPUFREQ setCurrentFrequencyIdx(0); #endif - converged_=false; + converged_ = false; } -void localoptimizer::reset(int threads, int freq_idx){ +void localoptimizer::reset(int threads, int freq_idx) +{ threads_param_ = threads; - param_changes_=0; + param_changes_ = 0; thread_param_values_.clear(); #ifdef ALLSCALE_HAVE_CPUFREQ - frequency_param_= freq_idx; + frequency_param_ = freq_idx; frequency_param_values_.clear(); #endif - current_objective_idx_=0; - steps_=0; - current_param_=thread; - converged_=false; + current_objective_idx_ = 0; + steps_ = 0; + current_param_ = thread; + converged_ = false; }; #ifdef DEBUG_ -void localoptimizer::printobjectives(){ - for(auto& el: objectives_){ - std::cout << "Objective" << "\t\t" << "Priority" << "\t\t" << "Leeway" << - std::endl; - switch (el.type){ - case time: - std::cout << "Time" << "\t\t" << el.priority << "\t\t" << el.leeway << - std::endl; - break; - case energy: - std::cout << "Energy" << "\t\t" << el.priority << "\t\t" << el.leeway << - std::endl; - break; - case resource: - std::cout << "Resource" << "\t\t" << el.priority << "\t\t" << el.leeway << - std::endl; - break; +void localoptimizer::printobjectives() +{ + for (auto &el : objectives_) + { + std::cout << "Objective" + << "\t\t" + << "Priority" + << "\t\t" + << "Leeway" << std::endl; + switch (el.type) + { + case time: + std::cout << "Time" + << "\t\t" << el.priority << "\t\t" << el.leeway << std::endl; + break; + case energy: + std::cout << "Energy" + << "\t\t" << el.priority << "\t\t" << el.leeway << std::endl; + break; + case resource: + std::cout << "Resource" + << "\t\t" << el.priority << "\t\t" << el.leeway << std::endl; + break; } } } -void localoptimizer::printverbosesteps(actuation act){ +void localoptimizer::printverbosesteps(actuation act) +{ static int last_frequency_idx = 0; - std::cout << "[INFO]"; - if (optmethod_==random) + if (optmethod_ == random) std::cout << "Random "; - else if (optmethod_==allscale){ + else if (optmethod_ == allscale) + { std::cout << "Allscale "; } std::cout << "Scheduler Step: Setting OS Threads to " << threads_param_; - #ifdef ALLSCALE_HAVE_CPUFREQ - if ( act.frequency_idx >= 0 ) +#ifdef ALLSCALE_HAVE_CPUFREQ + if (act.frequency_idx >= 0) last_frequency_idx = act.frequency_idx; std::cout << " , CPU Frequency to " << frequencies_param_allowed_[last_frequency_idx] - << std::endl; + << std::endl; #else std::cout << std::endl; #endif @@ -131,335 +144,367 @@ void localoptimizer::printverbosesteps(actuation act){ #endif -void localoptimizer::measureObjective(double iter_time, double power, double threads){ - std::cout <<"Measuring objective: " - << iter_time << " " - << power << " " +void localoptimizer::measureObjective(double iter_time, double power, double threads) +{ + std::cout << "Measuring objective: " + << iter_time << " " + << power << " " << threads << std::endl; - - for(auto& el: objectives_){ - switch (el.type){ - case time: - el.samples.insert(el.samples.begin(),iter_time); - if (el.samples.size()>1000) - el.samples.resize(500); - - el.threads_samples.insert(el.threads_samples.begin(),threads); - if (el.threads_samples.size()>1000) - el.threads_samples.resize(500); + + for (auto &el : objectives_) + { + switch (el.type) + { + case time: + el.samples.insert(el.samples.begin(), iter_time); + if (el.samples.size() > 1000) + el.samples.resize(500); + + el.threads_samples.insert(el.threads_samples.begin(), threads); + if (el.threads_samples.size() > 1000) + el.threads_samples.resize(500); #ifdef ALLSCALE_HAVE_CPUFREQ - el.freq_samples.insert(el.freq_samples.begin(),getCurrentFrequencyIdx()); - if (el.freq_samples.size()>1000) - el.freq_samples.resize(500); + el.freq_samples.insert(el.freq_samples.begin(), getCurrentFrequencyIdx()); + if (el.freq_samples.size() > 1000) + el.freq_samples.resize(500); #endif - if (el.globalmin > iter_time){ - el.globalmin = iter_time; - el.min_params_idx=param_changes_; - } - if (el.globalmax < iter_time) - el.globalmax = iter_time; + if (el.globalmin > iter_time) + { + el.globalmin = iter_time; + el.min_params_idx = param_changes_; + } + if (el.globalmax < iter_time) + el.globalmax = iter_time; #ifdef DEBUG__ - std::cout << "Iteration Time Minimum: " << el.globalmin << std::endl; - std::cout << "Iteration Time Maximum: " << el.globalmax << std::endl; - std::cout << "Iteration Time Samples: "; - for(auto& samp: el.samples) - std::cout << samp << ","; - std::cout << std::endl; -#endif - break; - case energy: - el.samples.insert(el.samples.begin(),power); - if (el.samples.size()>1000) - el.samples.resize(500); - - el.threads_samples.insert(el.threads_samples.begin(),threads); - if (el.threads_samples.size()>1000) - el.threads_samples.resize(500); + std::cout << "Iteration Time Minimum: " << el.globalmin << std::endl; + std::cout << "Iteration Time Maximum: " << el.globalmax << std::endl; + std::cout << "Iteration Time Samples: "; + for (auto &samp : el.samples) + std::cout << samp << ","; + std::cout << std::endl; +#endif + break; + case energy: + el.samples.insert(el.samples.begin(), power); + if (el.samples.size() > 1000) + el.samples.resize(500); + + el.threads_samples.insert(el.threads_samples.begin(), threads); + if (el.threads_samples.size() > 1000) + el.threads_samples.resize(500); #ifdef ALLSCALE_HAVE_CPUFREQ - el.freq_samples.insert(el.freq_samples.begin(),getCurrentFrequencyIdx()); - if (el.freq_samples.size()>1000) - el.freq_samples.resize(500); + el.freq_samples.insert(el.freq_samples.begin(), getCurrentFrequencyIdx()); + if (el.freq_samples.size() > 1000) + el.freq_samples.resize(500); #endif - if (el.globalmin > power){ - el.globalmin = power; - el.min_params_idx=param_changes_; - } - if (el.globalmax < power) - el.globalmax = power; + if (el.globalmin > power) + { + el.globalmin = power; + el.min_params_idx = param_changes_; + } + if (el.globalmax < power) + el.globalmax = power; #ifdef DEBUG__ - std::cout << "Power Consumption Minimum: " << el.globalmin << std::endl; - std::cout << "Power Consumption Maximum: " << el.globalmax << std::endl; - std::cout << "Power Consumption Samples: "; - for(auto& samp: el.samples) - std::cout << samp << ","; - std::cout << std::endl; -#endif - break; - case resource: - el.samples.insert(el.samples.begin(),threads); - if (el.samples.size()>1000) - el.samples.resize(500); - - el.threads_samples.insert(el.threads_samples.begin(),threads); - if (el.threads_samples.size()>1000) - el.threads_samples.resize(500); + std::cout << "Power Consumption Minimum: " << el.globalmin << std::endl; + std::cout << "Power Consumption Maximum: " << el.globalmax << std::endl; + std::cout << "Power Consumption Samples: "; + for (auto &samp : el.samples) + std::cout << samp << ","; + std::cout << std::endl; +#endif + break; + case resource: + el.samples.insert(el.samples.begin(), threads); + if (el.samples.size() > 1000) + el.samples.resize(500); + + el.threads_samples.insert(el.threads_samples.begin(), threads); + if (el.threads_samples.size() > 1000) + el.threads_samples.resize(500); #ifdef ALLSCALE_HAVE_CPUFREQ - el.freq_samples.insert(el.freq_samples.begin(),getCurrentFrequencyIdx()); - if (el.freq_samples.size()>1000) - el.freq_samples.resize(500); + el.freq_samples.insert(el.freq_samples.begin(), getCurrentFrequencyIdx()); + if (el.freq_samples.size() > 1000) + el.freq_samples.resize(500); #endif - if (el.globalmin > threads){ - el.globalmin = threads; - el.min_params_idx=param_changes_; - } - if (el.globalmax < threads) - el.globalmax = threads; + if (el.globalmin > threads) + { + el.globalmin = threads; + el.min_params_idx = param_changes_; + } + if (el.globalmax < threads) + el.globalmax = threads; #ifdef DEBUG__ - std::cout << "Threads Minimum: " << el.globalmin << std::endl; - std::cout << "Threads Maximum: " << el.globalmax << std::endl; - std::cout << "Threads Samples: "; - for(auto& samp: el.samples) - std::cout << samp << ","; - std::cout << std::endl; -#endif - break; + std::cout << "Threads Minimum: " << el.globalmin << std::endl; + std::cout << "Threads Maximum: " << el.globalmax << std::endl; + std::cout << "Threads Samples: "; + for (auto &samp : el.samples) + std::cout << samp << ","; + std::cout << std::endl; +#endif + break; } } } actuation localoptimizer::step() { - steps_++; - actuation act; - act.delta_threads=threads_param_; + steps_++; + actuation act; + act.delta_threads = threads_param_; #ifdef ALLSCALE_HAVE_CPUFREQ - act.frequency_idx=frequency_param_; + act.frequency_idx = frequency_param_; #endif - /* random optimization step */ - if (optmethod_ == random) - { - act.delta_threads = (rand() % max_threads_); + /* random optimization step */ + if (optmethod_ == random) + { + act.delta_threads = (rand() % max_threads_); #ifdef ALLSCALE_HAVE_CPUFREQ - act.frequency_idx = rand() % frequencies_param_allowed_.size(); - // if (act.frequency_idx == frequency_param_) - // act.frequency_idx = -1; + act.frequency_idx = rand() % frequencies_param_allowed_.size(); + // if (act.frequency_idx == frequency_param_) + // act.frequency_idx = -1; #endif - } + } - else if (optmethod_ == allscale) - { - if (current_objective_idx_ > objectives_.size()) - goto validate_act; + else if (optmethod_ == allscale) + { + if (current_objective_idx_ > objectives_.size()) + goto validate_act; - if (steps_ < warmup_steps_) - { + if (steps_ < warmup_steps_) + { #ifdef DEBUG_MULTIOBJECTIVE_ - std::cout << "[LOCALOPTIMIZER|INFO] Optimizer No-OP: either at warm-up or optimizer has completed\n"; + std::cout << "[LOCALOPTIMIZER|INFO] Optimizer No-OP: either at warm-up or optimizer has completed\n"; #endif - // set some random parametrization to collect at least 3 different - // vertices to be used as input to the optimizer - act.delta_threads = rand() % max_threads_; + // set some random parametrization to collect at least 3 different + // vertices to be used as input to the optimizer + +#if 1 + float bucket_dt = steps_ / (float)warmup_steps_; + float _min_threads = max_threads_ * bucket_dt; + + act.delta_threads = rand() % (int)ceil(bucket_dt) + roundf(_min_threads); #ifdef ALLSCALE_HAVE_CPUFREQ - act.frequency_idx = rand() % frequencies_param_allowed_.size(); + float _min_freqs = frequencies_param_allowed_.size() * bucket_dt; + act.frequency_idx = rand() % (int)ceil(bucket_dt) + roundf(_min_freqs); #endif - goto validate_act; - } +#endif + goto validate_act; + } - // iterate over all objectives in decreasing priority - objective obj = objectives_[current_objective_idx_]; + // iterate over all objectives in decreasing priority + objective obj = objectives_[current_objective_idx_]; - // initialize optimizer for this objective, if not already done so - if (!obj.initialized) - { + // initialize optimizer for this objective, if not already done so + if (!obj.initialized) + { #ifdef DEBUG_MULTIOBJECTIVE_ - std::cout << "[LOCALOPTIMIZER|INFO] Initializing optimizer for new objective\n"; - std::cout << "[LOCALOPTIMIZER|DEBUG] Samples: " << std::flush; - for (auto& sam: obj.samples) - { - std::cout << sam << "," << std::flush; - } - std::cout << "\n" << std::flush; - - std::cout << "[LOCALOPTIMIZER|DEBUG] Thread Param of Samples: " << std::flush; - for (auto& sam: obj.threads_samples) - { - std::cout << sam << "," << std::flush; - } - std::cout << "\n" << std::flush; + std::cout << "[LOCALOPTIMIZER|INFO] Initializing optimizer for new objective\n"; + std::cout << "[LOCALOPTIMIZER|DEBUG] Samples: " << std::flush; + for (auto &sam : obj.samples) + { + std::cout << sam << "," << std::flush; + } + std::cout << "\n" + << std::flush; + + std::cout << "[LOCALOPTIMIZER|DEBUG] Thread Param of Samples: " << std::flush; + for (auto &sam : obj.threads_samples) + { + std::cout << sam << "," << std::flush; + } + std::cout << "\n" + << std::flush; #ifdef ALLSCALE_HAVE_CPUFREQ - std::cout << "[LOCALOPTIMIZER|DEBUG] Freq Param of Samples: " << std::flush; - for (auto& sam: obj.freq_samples){ - std::cout << sam << "," << std::flush; - } - std::cout << "\n" << std::flush; + std::cout << "[LOCALOPTIMIZER|DEBUG] Freq Param of Samples: " << std::flush; + for (auto &sam : obj.freq_samples) + { + std::cout << sam << "," << std::flush; + } + std::cout << "\n" + << std::flush; #endif #endif - int samplenr = obj.samples.size(); + int samplenr = obj.samples.size(); #ifdef ALLSCALE_HAVE_CPUFREQ - double params[3][2]={ - {obj.threads_samples[samplenr-1],obj.freq_samples[samplenr-1]}, - {obj.threads_samples[samplenr-2],obj.freq_samples[samplenr-2]}, - {obj.threads_samples[samplenr-3],obj.freq_samples[samplenr-3]}, - }; - double values[3]={obj.samples[samplenr-1],obj.samples[samplenr-2],obj.samples[samplenr-3]}; - - double constraint_min[]={1,0}; - double constraint_max[]={(double)max_threads_, - (double)frequencies_param_allowed_.size()}; - std::cout << "initialize_simplex::Initializing with " << frequencies_param_allowed_.size() << " frequencies" << std::endl; - nmd.initialize_simplex(params,values,constraint_min,constraint_max); - objectives_[current_objective_idx_].initialized=true; + double params[3][2] = { + {obj.threads_samples[samplenr - 1], obj.freq_samples[samplenr - 1]}, + {obj.threads_samples[samplenr - 2], obj.freq_samples[samplenr - 2]}, + {obj.threads_samples[samplenr - 3], obj.freq_samples[samplenr - 3]}, + }; + double values[3] = {obj.samples[samplenr - 1], obj.samples[samplenr - 2], obj.samples[samplenr - 3]}; + double min_threads = round(max_threads_ * 0.25); + + if (min_threads < 1.0) + min_threads = 1.0; + + double constraint_min[] = {min_threads, 0}; + double constraint_max[] = {(double)max_threads_, + (double)frequencies_param_allowed_.size() - 1}; + std::cout << "initialize_simplex::Initializing with " << frequencies_param_allowed_.size() << " frequencies" << std::endl; + nmd.initialize_simplex(params, values, constraint_min, constraint_max); + objectives_[current_objective_idx_].initialized = true; #endif - } + } #ifdef DEBUG_MULTIOBJECTIVE_ - std::cout << "[LOCALOPTIMIZER|DEBG] Current Optimized Objective ="; - switch (obj.type) - { - case energy: - std::cout << "********** Energy\n" << std::flush; - break; - case time: - std::cout << "&&&&&&&&&& Time\n" << std::flush; - break; - case resource: - std::cout << "oooooooooo Resource\n" << std::flush; - break; - } - std::cout << "[LOCALOPTIMIZER|DEBUG] Samples: " << std::flush; - for (auto& sam: obj.samples) - { - std::cout << sam << "," << std::flush; - } - std::cout << "\n" << std::flush; + std::cout << "[LOCALOPTIMIZER|DEBG] Current Optimized Objective ="; + switch (obj.type) + { + case energy: + std::cout << "********** Energy\n" + << std::flush; + break; + case time: + std::cout << "&&&&&&&&&& Time\n" + << std::flush; + break; + case resource: + std::cout << "oooooooooo Resource\n" + << std::flush; + break; + } + std::cout << "[LOCALOPTIMIZER|DEBUG] Samples: " << std::flush; + for (auto &sam : obj.samples) + { + std::cout << sam << "," << std::flush; + } + std::cout << "\n" + << std::flush; - std::cout << "[LOCALOPTIMIZER|DEBUG] Freq Param of Samples: " << std::flush; + std::cout << "[LOCALOPTIMIZER|DEBUG] Freq Param of Samples: " << std::flush; #ifdef ALLSCALE_HAVE_CPUFREQ - for (auto& sam: obj.freq_samples) - { - std::cout << sam << "," << std::flush; - } - std::cout << "\n" << std::flush; + for (auto &sam : obj.freq_samples) + { + std::cout << sam << "," << std::flush; + } + std::cout << "\n" + << std::flush; #endif #endif - optstepresult nmd_res = nmd.step(obj.samples[0]); + optstepresult nmd_res = nmd.step(obj.samples[0]); #ifdef DEBUG_MULTIOBJECTIVE_ - std::cout << "[LOCALOPTIMIZER|DEBUG] Calling NMD Optimizer Step, Param = \n"; - std::cout << "[LOCALOPTIMIZER|DEBUG] New Vertex to try: "; - std::cout << "Threads = " << nmd_res.threads; + std::cout << "[LOCALOPTIMIZER|DEBUG] Calling NMD Optimizer Step, Param = \n"; + std::cout << "[LOCALOPTIMIZER|DEBUG] New Vertex to try: "; + std::cout << "Threads = " << nmd_res.threads; +#ifdef ALLSCALE_HAVE_CPUFREQ + std::cout << " Freq Idx = " << nmd_res.freq_idx << std::endl; +#endif + std::cout << "Converg Thresh = " << convergence_threshold_ << std::endl; +#endif + if (nmd_res.converged) + { + objectives_[current_objective_idx_].converged = true; + objectives_[current_objective_idx_].converged_minimum = nmd.getMinObjective(); + double *minimization_point = nmd.getMinVertices(); + objectives_[current_objective_idx_].minimization_params[0] = + minimization_point[0]; + objectives_[current_objective_idx_].minimization_params[1] = + minimization_point[1]; +#ifdef DEBUG_CONVERGENCE_ + std::cout << "[LOCALOPTIMIZER|INFO] NMD convergence\n"; + std::cout << "******************************************" << std::endl; + std::cout << "[LOCALOPTIMIZER|INFO] Minimal Objective Value = " << objectives_[current_objective_idx_].converged_minimum << "Threads = " << minimization_point[0] << "Freq_idx = " << minimization_point[1] << std::endl; + std::cout << "******************************************" << std::endl; +#endif + act.delta_threads = minimization_point[0]; #ifdef ALLSCALE_HAVE_CPUFREQ - std::cout << " Freq Idx = " << nmd_res.freq_idx << std::endl; + act.frequency_idx = minimization_point[1]; #endif - std::cout << "Converg Thresh = " << convergence_threshold_ << std::endl; + current_objective_idx_++; + if (current_objective_idx_ == objectives_.size()) + { + converged_ = true; +#ifdef DEBUG_CONVERGENCE_ + std::cout << "[LOCALOPTIMIZER|INFO] ALL OBJECTIVES HAVE CONVERGED " << std::endl; #endif - if (nmd_res.converged) + } + } + else + { +#if 0 + // if a higher priority objective starts getting off leeway margin, + // decide convergence of the current param at this parameter point + if (current_objective_idx_ > 0) + for (int i = 0; i < current_objective_idx_; i++) { + objective priority_obj = objectives_[i]; + double max_leeway_value = priority_obj.converged_minimum + + priority_obj.leeway * (priority_obj.globalmax - priority_obj.converged_minimum); + if (priority_obj.samples[0] > max_leeway_value && + priority_obj.samples[1] > max_leeway_value) + { objectives_[current_objective_idx_].converged = true; objectives_[current_objective_idx_].converged_minimum = nmd.getMinObjective(); - double* minimization_point = nmd.getMinVertices(); - objectives_[current_objective_idx_].minimization_params[0]= + double *minimization_point = nmd.getMinVertices(); + objectives_[current_objective_idx_].minimization_params[0] = minimization_point[0]; - objectives_[current_objective_idx_].minimization_params[1]= + objectives_[current_objective_idx_].minimization_params[1] = minimization_point[1]; + #ifdef DEBUG_CONVERGENCE_ - std::cout << "[LOCALOPTIMIZER|INFO] NMD convergence\n"; + std::cout << "[LOCALOPTIMIZER|INFO] Leeway convergence\n"; std::cout << "******************************************" << std::endl; - std::cout << "[LOCALOPTIMIZER|INFO] Minimal Objective Value = " << - objectives_[current_objective_idx_].converged_minimum << - "Threads = " << minimization_point[0] << "Freq_idx = " << minimization_point[1] << - std::endl; + std::cout << "[LOCALOPTIMIZER|INFO] Minimal Objective Value = " << objectives_[current_objective_idx_].converged_minimum << "Threads = " << minimization_point[0] << "Freq_idx = " << minimization_point[1] << std::endl; std::cout << "******************************************" << std::endl; #endif - act.delta_threads=minimization_point[0]; + // find the parameter point that scores the leeway margin value + act.delta_threads = (int)priority_obj.minimization_params[0] * + (max_leeway_value / priority_obj.converged_minimum); #ifdef ALLSCALE_HAVE_CPUFREQ - act.frequency_idx=minimization_point[1]; + act.frequency_idx = (int)priority_obj.minimization_params[1] * + (max_leeway_value / priority_obj.converged_minimum); #endif current_objective_idx_++; if (current_objective_idx_ == objectives_.size()) { - converged_=true; + converged_ = true; #ifdef DEBUG_CONVERGENCE_ - std::cout << "[LOCALOPTIMIZER|INFO] ALL OBJECTIVES HAVE CONVERGED " << std::endl; + std::cout << "[LOCALOPTIMIZER|INFO] ALL OBJECTIVES HAVE CONVERGED " << std::endl; #endif } - } - else - { - // if a higher priority objective starts getting off leeway margin, - // decide convergence of the current param at this parameter point - if (current_objective_idx_>0) - for (int i=0;i max_leeway_value && - priority_obj.samples[1] > max_leeway_value) - { - objectives_[current_objective_idx_].converged = true; - objectives_[current_objective_idx_].converged_minimum = nmd.getMinObjective(); - double* minimization_point = nmd.getMinVertices(); - objectives_[current_objective_idx_].minimization_params[0]= - minimization_point[0]; - objectives_[current_objective_idx_].minimization_params[1]= - minimization_point[1]; - -#ifdef DEBUG_CONVERGENCE_ - std::cout << "[LOCALOPTIMIZER|INFO] Leeway convergence\n"; - std::cout << "******************************************" << std::endl; - std::cout << "[LOCALOPTIMIZER|INFO] Minimal Objective Value = " << - objectives_[current_objective_idx_].converged_minimum << - "Threads = " << minimization_point[0] << "Freq_idx = " << minimization_point[1] << - std::endl; - std::cout << "******************************************" << std::endl; -#endif - // find the parameter point that scores the leeway margin value - act.delta_threads = (int)priority_obj.minimization_params[0]* - (max_leeway_value/priority_obj.converged_minimum); -#ifdef ALLSCALE_HAVE_CPUFREQ - act.frequency_idx = (int)priority_obj.minimization_params[1]* - (max_leeway_value/priority_obj.converged_minimum); -#endif - current_objective_idx_++; - if (current_objective_idx_ == objectives_.size()) - { - converged_=true; -#ifdef DEBUG_CONVERGENCE_ - std::cout << "[LOCALOPTIMIZER|INFO] ALL OBJECTIVES HAVE CONVERGED " << std::endl; -#endif - } - act.delta_threads=(nmd_res.threads==0)?getCurrentThreads():nmd_res.threads; + act.delta_threads = (nmd_res.threads == 0) ? getCurrentThreads() : nmd_res.threads; #ifdef ALLSCALE_HAVE_CPUFREQ - act.frequency_idx=nmd_res.freq_idx; + act.frequency_idx = nmd_res.freq_idx; #endif - goto validate_act; - } - } + goto validate_act; + } } +#else + act.delta_threads = nmd_res.threads; + act.frequency_idx = nmd_res.freq_idx; +#endif } - validate_act: + } +validate_act: - if ( act.delta_threads > max_threads_) { - act.delta_threads = max_threads_; - } else if ( act.delta_threads < 1 ) { - act.delta_threads = getCurrentThreads(); - } + if (act.delta_threads > max_threads_) + { + act.delta_threads = max_threads_; + } + else if (act.delta_threads < 1) + { + act.delta_threads = getCurrentThreads(); + } #ifdef ALLSCALE_HAVE_CPUFREQ - // VV: If freq_idx is -1 then set it to last used frequency (frequency_param_) - if ( act.frequency_idx < 0) - act.frequency_idx= frequency_param_; + // VV: If freq_idx is -1 then set it to last used frequency (frequency_param_) + if (act.frequency_idx < 0) + act.frequency_idx = frequency_param_; + else if (act.frequency_idx > frequencies_param_allowed_.size() - 1) + { + act.frequency_idx = frequencies_param_allowed_.size() - 1; + } #endif - return act; -} -} + return act; } +} // namespace components +} // namespace allscale diff --git a/src/components/nmsimplex_bbincr.cpp b/src/components/nmsimplex_bbincr.cpp index a0c964e..b96664a 100644 --- a/src/components/nmsimplex_bbincr.cpp +++ b/src/components/nmsimplex_bbincr.cpp @@ -18,451 +18,507 @@ vector constraint_max[0]){ - x[0] = (constraint_min[0] + constraint_max[0])/2; - } - - if (x[1] < constraint_min[1] || x[1] > constraint_max[1]){ - x[1] = (constraint_min[1] + constraint_max[1])/2; - } - - x[0]=round(x[0]); - x[1]=round(x[1]); + // round to integer and bring again with allowable margins + // todo fix: generalize + + // if (x[0] < constraint_min[0] || x[0] > constraint_max[0]){ + // x[0] = (constraint_min[0] + constraint_max[0])/2; + // } + + // if (x[1] < constraint_min[1] || x[1] > constraint_max[1]){ + // x[1] = (constraint_min[1] + constraint_max[1])/2; + // } + + for (auto i = 0u; i < 2u; ++i) + { + if (x[i] < constraint_min[i]) + x[i] = constraint_min[i]; + else if (x[i] > constraint_max[i]) + x[i] = constraint_max[i]; + } + + x[0] = round(x[0]); + x[1] = round(x[1]); } /* FIXME: generalize */ -void NelderMead::initialize_simplex(double params[][2], double values[], double constraint_min[],double constraint_max[]) +void NelderMead::initialize_simplex(double params[][2], double values[], double constraint_min[], double constraint_max[]) { - int i,j; + int i, j; - for (i=0;i<=n;i++) { - for (j=0;jconstraint_min[i] = constraint_min[i]; + this->constraint_max[i] = constraint_max[i]; } - f[i]=values[i]; - this->constraint_min[i]=constraint_min[i]; - this->constraint_max[i]=constraint_max[i]; - } - itr=0; -} + itr = 0; + state_ = start; +} /* print out the initial values */ void NelderMead::print_initial_simplex() { - int i,j; - std::cout << "[NelderMead DEBUG] Initial Values\n"; - for (j=0;j<=n;j++) { - for (i=0;i= f[vs]) { // f(R)>f(B) - for (j=0;j<=n-1;j++) { // replace W with R and end iteration - v[vg][j] = vr[j]; - } - f[vg] = fr; - updateObjectives(); - state_=start; - break; + fr = param; + + std::cout << "fr:" << fr << " f[vh]:" << f[vh] + << " f[vs]:" << f[vs] << std::endl; + + if ( (f[vs] <= fr) && (fr < f[vh]) ) { + // VV: REFLECTED point is better than the SECOND BEST + // but NOT better than the BEST + // Replace WORST point with REFLECTED + for (j = 0; j <= n - 1; j++) + { + v[vg][j] = vr[j]; } + f[vg] = fr; + state_ = start; + return do_step_start(param); + } else if ( fr < f[vs] ) { + // VV: REFLECTED is better than BEST + + for ( j=0; j<=n-1; ++j) + ve[j] = vm[j] + GAMMA * (vr[j] - vm[j]); + + my_constraints(ve); + // VV: Now evaluate EXPANDED + res.threads = ve[0]; + res.freq_idx = ve[1]; + + state_ = expansion; + + return res; + } else if ( (f[vh] <= fr) && (fr < f[vg])) { + // VV: REFLECTED between SECOND BEST and WORST + + for ( j=0; j<=n-1; ++j) + vc[j] = vm[j] + BETA * (vr[j] - vm[j]); + + my_constraints(vc); + + // VV: Now evaluate EXPANDED + res.threads = vc[0]; + res.freq_idx = vc[1]; + + state_ = contraction; + + return res; + } else { + // VV: REFLECTED worse than WORST + for ( j=0; j<=n-1; ++j) + vc[j] = vm[j] - BETA * (vr[j] - vm[j]); + + my_constraints(vc); + + // VV: Now evaluate EXPANDED + res.threads = vc[0]; + res.freq_idx = vc[1]; + + state_ = contraction; + + return res; + } +} - /* investigate a step further through expansion in this direction */ - else{ - for (j=0;j<=n-1;j++) { - /*ve[j] = GAMMA*vr[j] + (1-GAMMA)*vm[j];*/ - ve[j] = vm[j]+GAMMA*(vr[j]-vm[j]); - } +optstepresult NelderMead::do_step_expand(double param) +{ #ifdef NMD_DEBUG_ - std::cout << "[NelderMead DEBUG] Expansion Parameter = (" - << ve[0] << "," << ve[1] << ")" - << std::endl; + std::cout << "[NelderMead DEBUG] State = Expansion" << std::endl; #endif - my_constraints(ve); - // enter the state waiting for a sampled value of the objective function - // at the expansion vertex - state_=expansion; - res.threads=ve[0]; - res.freq_idx=ve[1]; - - break; + fe = param; + + if (fe < fr) + { + // VV: EXPANDED point is better than REFLECTIVE + // Replace WORST with EXPANDED + for (j = 0; j <= n - 1; j++) + { + v[vg][j] = ve[j]; } - - }else{ // f(R) > f(G) - Case (ii) - if (fr < f[vg]) { // f(R) < f(W) - for (j=0;j<=n-1;j++) { // replace W with R - v[vg][j] = vr[j]; - } - f[vg] = fr; + f[vg] = fe; + } + else + { + // VV: Replace WORST with REFLECTED + for (j = 0; j <= n - 1; j++) + { + v[vg][j] = vr[j]; } + f[vg] = fr; + } - if (fr < f[vg] && fr >= f[vh]) { - /* perform outside contraction */ - for (j=0;j<=n-1;j++) { - /*vc[j] = BETA*v[vg][j] + (1-BETA)*vm[j];*/ - vc[j] = vm[j]+BETA*(vr[j]-vm[j]); - } + state_ = start; + + return do_step_start(param); +} + +optstepresult NelderMead::do_step_contract(double param) +{ + int j; #ifdef NMD_DEBUG_ - std::cout << "[NelderMead DEBUG] Contraction Parameter = (" - << vc[0] << "," << vc[1] << ")" - << std::endl; + std::cout << "[NelderMead|DEBUG] State = Contraction" << std::endl; #endif - my_constraints(vc); - // enter the state waiting for a sampled value of the objective function - // at the outside contraction vertex - state_=contraction; - res.threads=vc[0]; - res.freq_idx=vc[1]; - break; - } else { - /* perform inside contraction */ - for (j=0;j<=n-1;j++) { - /*vc[j] = BETA*v[vg][j] + (1-BETA)*vm[j];*/ - vc[j] = vm[j]-BETA*(vm[j]-v[vg][j]); - } + fc = param; + + if ( fc <= fr ) { + // VV: CONTRACTED_O is better than REFLECTED + // Replace WORST with CONTRACTED_O + for (j = 0; j <= n - 1; j++) + { + v[vg][j] = vc[j]; + } + f[vg] = fc; + + return do_step_start(param); + } else { + // VV: Replace SECOND BEST + for (j = 0; j <= n - 1; j++) + v[vh][j] = v[vs][j] + DELTA * (v[vh][j] - v[vs][j]); + + my_constraints(v[vh]); + // VV: Now evaluate SHRINK + + optstepresult res; + res.threads = v[vh][0]; + res.freq_idx = v[vh][1]; + state_ = shrink; + return res; + } +} + +optstepresult NelderMead::do_step_shrink(double param) +{ #ifdef NMD_DEBUG_ - std::cout << "[NelderMead DEBUG] Contraction Parameter = (" - << vc[0] << "," << vc[1] << ")" - << std::endl; + std::cout << "[NelderMead|DEBUG] State = Shrink" << std::endl; #endif - my_constraints(vc); - state_=contraction; - res.threads=vc[0]; - res.freq_idx=vc[1]; - break; - } + f[vh] = param; + return do_step_start(param); +} + +optstepresult NelderMead::step(double param) +{ + int i, j; + optstepresult res; + res.threads = 0; + res.freq_idx = -1; - /** EXPANSION **/ + switch (state_) + { - /** This state is entered when we have received a sample of the objective - ** function at the expansion vertex - **/ + case start: + res = do_step_start(param); + break; + case reflection: + res = do_step_reflect(param); + break; case expansion: -#ifdef NMD_DEBUG_ - std::cout << "[NelderMead DEBUG] State = Expansion" << std::endl; -#endif - fe=param; - //fe = objfunc(ve); - if (fe < f[vs]) { // if f(E)= EPSILON && itr <= MAXITERATIONS) - return false; - else{ - vs = vs_index(); - min=f[vs]; - return true; - } + if (s >= EPSILON && itr <= MAXITERATIONS) + return false; + else + { + vs = vs_index(); + min = f[vs]; + return true; + } } -void NelderMead::updateObjectives(){ - /* re-evaluate all the vertices */ - /*for (j=0;j<=n;j++) { - f[j] = objfunc(v[j]); - } - */ +void NelderMead::updateObjectives() +{ + /* re-evaluate all the vertices */ + /*for (j=0;j<=n;j++) { + f[j] = objfunc(v[j]); + } + */ - /* find the index of the largest value */ - vg = vg_index(); + /* find the index of the largest value */ + vg = vg_index(); - /* find the index of the smallest value */ - vs = vs_index(); + /* find the index of the smallest value */ + vs = vs_index(); - /* find the index of the second largest value */ - vh = vh_index(); + /* find the index of the second largest value */ + vh = vh_index(); - my_constraints(v[vg]); + my_constraints(v[vg]); - //f[vg] = objfunc(v[vg]); + //f[vg] = objfunc(v[vg]); - my_constraints(v[vh]); + my_constraints(v[vh]); - //f[vh] = objfunc(v[vh]); + //f[vh] = objfunc(v[vh]); } -} -} +} // namespace components +} // namespace allscale /* -std::vector NelderMead::minimum(){ - - - free(f); - free(vr); - free(ve); - free(vc); - free(vm); - for (i=0;i<=n;i++) { - free (v[i]); - } - free(v); - return min; - - -} -*/ + std::vector NelderMead::minimum(){ + free(f); + free(vr); + free(ve); + free(vc); + free(vm); + for (i=0;i<=n;i++) { + free (v[i]); + } + free(v); + return min; + } + */ diff --git a/src/components/scheduler_component.cpp b/src/components/scheduler_component.cpp index 678d539..e2b7df9 100644 --- a/src/components/scheduler_component.cpp +++ b/src/components/scheduler_component.cpp @@ -490,8 +490,27 @@ void scheduler::init() { lopt_.reset(os_thread_count,0); #if defined(ALLSCALE_HAVE_CPUFREQ) using hardware_reconf = allscale::components::util::hardware_reconf; + auto freqs = hardware_reconf::get_frequencies(0); + + const std::size_t max_freqs = 5; + std::size_t keep_every = (std::size_t) ceilf(freqs.size() / (float) max_freqs); + + if ( keep_every > 1 ) { + std::vector new_freqs; + + int i, j, len; + + for (j=0, i=0, len=freqs.size(); i freq_temp = - lopt_.setfrequencies(hardware_reconf::get_frequencies(0)); + lopt_.setfrequencies(freqs); if (freq_temp.empty()){ HPX_THROW_EXCEPTION(hpx::bad_request, "scheduler::init", "error in initializing the local optimizer, allowed frequency values are empty"); diff --git a/src/components/util/hardware_reconf.cpp b/src/components/util/hardware_reconf.cpp index 4cf1491..b515977 100644 --- a/src/components/util/hardware_reconf.cpp +++ b/src/components/util/hardware_reconf.cpp @@ -5,6 +5,7 @@ #include #include #include +#include // std::sort #include @@ -25,6 +26,7 @@ namespace allscale { namespace components { namespace util { if (available_frequencies != nullptr) cpufreq_put_available_frequencies(available_frequencies); + std::sort(frequencies.begin(), frequencies.end()); return frequencies; } From 9fafe64bd58e380396e27b540ea4c6fdf0b4bdda Mon Sep 17 00:00:00 2001 From: Vassilis Vassiliadis Date: Tue, 13 Nov 2018 16:47:47 +0000 Subject: [PATCH 05/37] Explore all objectives at the same time easier integration with dashboard --- allscale/components/localoptimizer.hpp | 41 +- allscale/components/nmsimplex_bbincr.hpp | 243 ++++---- allscale/components/scheduler.hpp | 6 +- src/components/localoptimizer.cpp | 664 ++++++++-------------- src/components/nmsimplex_bbincr.cpp | 690 +++++++++++++++++------ src/components/scheduler_component.cpp | 20 +- 6 files changed, 959 insertions(+), 705 deletions(-) diff --git a/allscale/components/localoptimizer.hpp b/allscale/components/localoptimizer.hpp index 1f7aae0..d9799cc 100644 --- a/allscale/components/localoptimizer.hpp +++ b/allscale/components/localoptimizer.hpp @@ -3,6 +3,7 @@ #define ALLSCALE_COMPONENTS_LOCALOPTIMIZER_HPP #include + #if defined(ALLSCALE_HAVE_CPUFREQ) #include #endif @@ -20,7 +21,7 @@ namespace allscale { namespace components { - enum objectiveType {time,energy,resource}; + enum objectiveType {time, energy, resource}; enum parameterType {thread, frequency}; @@ -28,6 +29,8 @@ namespace allscale { namespace components { /* structure type of a single optimization objective */ struct objective{ + double last_scores[3]; + objectiveType type; /* leeway threshold desired, 0-1 double */ double leeway; @@ -60,7 +63,7 @@ namespace allscale { namespace components { double minimization_params[2]; }; - + /* structure type modelling an optimization actuation action to be taken by the scheduler */ struct actuation{ @@ -81,14 +84,19 @@ namespace allscale { namespace components { { localoptimizer() :nmd(0.01), + pending_threads(0.), + pending_energy(0.), + pending_time(0.), + pending_num_times(0.), + mo_initialized(false), #if defined(ALLSCALE_HAVE_CPUFREQ) frequency_param_(0), #endif current_objective_idx_(0),converged_(false) { - if (optmethod_==random) - srand (std::time(NULL)); - } + if (optmethod_==random) + srand (std::time(NULL)); + } localoptimizer(std::list); @@ -99,10 +107,14 @@ namespace allscale { namespace components { #ifdef DEBUG_ std::cout << "Local Optimizer Initialized with " << policyToString(pol) - << " policy for single objective search." + << " policy for multi-objective search." << std::endl; #endif } +#ifdef ALLSCALE_HAVE_CPUFREQ + void initialize_nmd(); +#endif + double opt_weights[NMD_NUM_OBJECTIVES]; searchPolicy getPolicy(){return optmethod_;} @@ -166,6 +178,23 @@ namespace allscale { namespace components { } private: + void accumulate_objective_measurements(); + void reset_accumulated_measurements(); + + std::vector samples_energy; + std::vector samples_time; + std::vector samples_threads; + std::vector samples_freq; + + bool explore_knob_domain; + + double initialization_samples[NMD_NUM_KNOBS+1][NMD_NUM_OBJECTIVES]; + double initialization_params[NMD_NUM_KNOBS+1][NMD_NUM_KNOBS]; + + double pending_time, pending_energy, pending_threads; + unsigned long pending_num_times; + + bool mo_initialized; /* vector of active optimization objectives. Objectives are stored in the vector in decreasing priority order */ diff --git a/allscale/components/nmsimplex_bbincr.hpp b/allscale/components/nmsimplex_bbincr.hpp index ea4f3bd..e87fe8c 100644 --- a/allscale/components/nmsimplex_bbincr.hpp +++ b/allscale/components/nmsimplex_bbincr.hpp @@ -18,146 +18,195 @@ #include #include +#include +#include +#include + #ifdef MACOSX #include #else #include #endif -namespace allscale { namespace components { +namespace allscale +{ +namespace components +{ + +// VV: threads, freq_idx +#define NMD_NUM_KNOBS 2 +// VV: time, energy/power, resources +#define NMD_NUM_OBJECTIVES 3 + +#define MAX_IT 1000 /* maximum number of iterations */ +#define ALPHA 1.0 /* reflection coefficient */ +#define BETA 0.5 /* contraction coefficient */ +#define GAMMA 2.0 /* expansion coefficient */ +#define DELTA 0.5 /* shrinking coefficient */ -#define MAX_IT 1000 /* maximum number of iterations */ -#define ALPHA 1.0 /* reflection coefficient */ -#define BETA 0.5 /* contraction coefficient */ -#define GAMMA 2.0 /* expansion coefficient */ -#define DELTA 0.5 /* shrinking coefficient */ +#define CACHE_EXPIRE_AFTER_MS 5000 /* structure type of a single optimization step return status */ -struct optstepresult{ - /* true if optimization has converged for the specified objective */ - bool converged; - /* number of threads for parameters to set for sampling */ - double threads; - /* index to frequency vector for freq parameter to set for sampling*/ - int freq_idx; +struct optstepresult +{ + /* true if optimization has converged for the specified objective */ + bool converged; + /* number of threads for parameters to set for sampling */ + double threads; + /* index to frequency vector for freq parameter to set for sampling*/ + int freq_idx; + + /******VV: Cache stuff******/ + double score; + double objectives[3]; // (time, energy, resource) + // VV: _cache_expires denotes dt (in ms) after _cache_timestamp + int64_t _cache_timestamp, _cache_expires_dt; }; +typedef std::map, optstepresult> MapCache_t; + /* enumeration encoding state that the incremental Nelder Mead optimizer is at */ -enum iterationstates {start, reflection, expansion, - contraction, shrink}; +enum iterationstates +{ + start, + reflection, + expansion, + contraction, + shrink +}; -class NelderMead { +class NelderMead +{ + +public: + NelderMead(double); + // VV: For the time being params = [threads, freq_idx] + // objectives = [time, energy/power, resources] + // weights = [ W_time, W_energy/power, W_resources ] + // constraint_min = [min_threads, min_freq_idx] + void initialize_simplex(double params[NMD_NUM_KNOBS+1][NMD_NUM_KNOBS], + double objectives[][NMD_NUM_OBJECTIVES], + double weights[NMD_NUM_OBJECTIVES], + double constraint_min[NMD_NUM_KNOBS], + double constraint_max[NMD_NUM_KNOBS]); + void print_initial_simplex(); + void print_iteration(); + + double *getMinVertices() + { + return v[vs]; + } + + double getMinObjective() + { + return min; + } + + unsigned long int getIterations() { return itr; } + double evaluate_score(const double objectives[], const double *weights) const; + void set_weights(double weights[]); + + optstepresult step(const double objectives[]); +private: + //VV: objective_type: { : optstepresult } + MapCache_t cache_; + + optstepresult do_step_start(); + optstepresult do_step_reflect(const double objectives[]); + optstepresult do_step_expand(const double objectives[]); + optstepresult do_step_contract(const double objectives[]); + optstepresult do_step_shrink(const double objectives[]); - public: - NelderMead(double); - void initialize_simplex(double params[][2], double*,double*,double*); - void print_initial_simplex(); - void print_iteration(); - optstepresult step(double param); - double* getMinVertices(){ - return v[vs]; - } + bool knob_set_exists(double knobs[2], int exclude); - double getMinObjective(){ - return min; - } + void sort_vertices(void); + void my_constraints(double *); + void centroid(); + bool testConvergence(); - unsigned long int getIterations(){return itr;} + // VV: Will return false if entry not in cache + bool cache_update(int threads, int freq_idx, + const double objectives[], + bool add_if_new); - private: + double round2(double num, int precision) + { + double rnum = 0.0; + int tnum; - optstepresult do_step_start(double param); - optstepresult do_step_reflect(double param); - optstepresult do_step_expand(double param); - optstepresult do_step_contract(double param); - optstepresult do_step_shrink(double param); + if (num == 0.0) + return num; - int vg_index(); - int vs_index(); - int vh_index(); - void sort_vertices(void); - void my_constraints(double*); - void centroid(); - bool testConvergence(); - void updateObjectives(); + rnum = num * pow(10, precision); + tnum = (int)(rnum < 0 ? rnum - 0.5 : rnum + 0.5); + rnum = tnum / pow(10, precision); - double round2(double num, int precision) - { - double rnum = 0.0; - int tnum; + return rnum; + } - if (num == 0.0) - return num; + /* vertex with smallest value */ + int vs; - rnum = num*pow(10,precision); - tnum = (int)(rnum < 0 ? rnum-0.5 : rnum + 0.5); - rnum = tnum/pow(10,precision); + /* vertex with next smallest value */ + int vh; - return rnum; - } + /* vertex with largest value */ + int vg; - /* vertex with smallest value */ - int vs; + int i, j, row; + + const int n = 2; - /* vertex with next smallest value */ - int vh; + /* track the number of function evaluations */ + int k; - /* vertex with largest value */ - int vg; - - int i,j,row; + /* track the number of iterations */ + int itr; - const int n=2; + /* holds vertices of simplex */ + double **v; - /* track the number of function evaluations */ - int k; + /* value of function at each vertex */ + double *f; - /* track the number of iterations */ - int itr; - - /* holds vertices of simplex */ - double **v; + /* value of function at reflection point */ + double fr; - /* value of function at each vertex */ - double *f; + /* value of function at expansion point */ + double fe; - /* value of function at reflection point */ - double fr; + /* value of function at contraction point */ + double fc; - /* value of function at expansion point */ - double fe; + /* reflection - coordinates */ + double *vr; - /* value of function at contraction point */ - double fc; + /* expansion - coordinates */ + double *ve; - /* reflection - coordinates */ - double *vr; + /* contraction - coordinates */ + double *vc; - /* expansion - coordinates */ - double *ve; + /* centroid - coordinates */ + double *vm; - /* contraction - coordinates */ - double *vc; + double min; - /* centroid - coordinates */ - double *vm; + double fsum, favg, s; - double min; - - double fsum,favg,s; + double EPSILON; - double EPSILON; + iterationstates state_; - iterationstates state_; + const int MAXITERATIONS = 15; - const int MAXITERATIONS = 15; - - double constraint_min[2]; + double constraint_min[2]; - double constraint_max[2]; + double constraint_max[2]; + double opt_weights[NMD_NUM_OBJECTIVES]; }; -} -} +} // namespace components +} // namespace allscale #endif diff --git a/allscale/components/scheduler.hpp b/allscale/components/scheduler.hpp index 7eed6e5..c508900 100644 --- a/allscale/components/scheduler.hpp +++ b/allscale/components/scheduler.hpp @@ -5,8 +5,10 @@ #include #include #include + #if defined(ALLSCALE_HAVE_CPUFREQ) #include +#else #endif #include @@ -108,7 +110,7 @@ namespace allscale { namespace components { long last_optimization_timestamp_; /* periodicity in milliseconds to invoke the optimizer */ - const long optimization_period_ms = 5; + const long optimization_period_ms = 5000; /* captures absolute timestamp of the last time optimization objective value have been measured (sampled) */ @@ -117,7 +119,7 @@ namespace allscale { namespace components { long last_objective_measurement_timestamp_; /* periodicity in milliseconds to invoke objective sampling */ - const long objective_measurement_period_ms = 1; + const long objective_measurement_period_ms = 1000; //extra masks to better handle suspending/resuming threads std::vector thread_pools_; diff --git a/src/components/localoptimizer.cpp b/src/components/localoptimizer.cpp index 593853b..04ef472 100644 --- a/src/components/localoptimizer.cpp +++ b/src/components/localoptimizer.cpp @@ -24,487 +24,315 @@ namespace allscale { namespace components { - localoptimizer::localoptimizer(std::list targetobjectives) - : objectives_((int)targetobjectives.size()), - nmd(convergence_threshold_), - param_changes_(0), - steps_(0), - current_param_(thread), - converged_(false) + : objectives_((int)targetobjectives.size()), + nmd(convergence_threshold_), + param_changes_(0), + steps_(0), + current_param_(thread), + converged_(false) { - for (objective o : targetobjectives) - { - //std::cout << o.type << "," << o.leeway << "," << o.priority << '\n'; - objectives_[o.priority] = o; - objectives_[o.priority].localmin = 10000; - objectives_[o.priority].globalmin = 10000; - objectives_[o.priority].localmax = 0.0; - objectives_[o.priority].globalmax = 0.0; - objectives_[o.priority].converged = false; - objectives_[o.priority].initialized = false; - objectives_[o.priority].min_params_idx = 0; - objectives_[o.priority].converged_minimum = 0; - } + for (objective o : targetobjectives) + { + //std::cout << o.type << "," << o.leeway << "," << o.priority << '\n'; + objectives_[o.priority] = o; + objectives_[o.priority].localmin = 10000; + objectives_[o.priority].globalmin = 10000; + objectives_[o.priority].localmax = 0.0; + objectives_[o.priority].globalmax = 0.0; + objectives_[o.priority].converged = false; + objectives_[o.priority].initialized = false; + objectives_[o.priority].min_params_idx = 0; + objectives_[o.priority].converged_minimum = 0; + } #ifdef ALLSCALE_HAVE_CPUFREQ - setCurrentFrequencyIdx(0); + setCurrentFrequencyIdx(0); #endif }; void localoptimizer::setobjectives(std::list targetobjectives) { - objectives_.clear(); - objectives_.resize((int)targetobjectives.size()); - for (objective o : targetobjectives) - { - //std::cout << o.type << "," << o.leeway << "," << o.priority << '\n'; - objectives_[o.priority] = o; - objectives_[o.priority].localmin = 10000; - objectives_[o.priority].globalmin = 10000; - objectives_[o.priority].localmax = 0.0; - objectives_[o.priority].globalmax = 0.0; - objectives_[o.priority].converged = false; - objectives_[o.priority].initialized = false; - objectives_[o.priority].min_params_idx = 0; - objectives_[o.priority].converged_minimum = 0; - } - steps_ = 0; - param_changes_ = 0; - current_param_ = thread; + objectives_.clear(); + objectives_.resize((int)targetobjectives.size()); + + explore_knob_domain = true; + + for (objective o : targetobjectives) + { + //std::cout << o.type << "," << o.leeway << "," << o.priority << '\n'; + objectives_[o.priority] = o; + objectives_[o.priority].localmin = 10000; + objectives_[o.priority].globalmin = 10000; + objectives_[o.priority].localmax = 0.0; + objectives_[o.priority].globalmax = 0.0; + objectives_[o.priority].converged = false; + objectives_[o.priority].initialized = false; + objectives_[o.priority].min_params_idx = 0; + objectives_[o.priority].converged_minimum = 0; + + opt_weights[o.type] = o.leeway; + } + steps_ = 0; + param_changes_ = 0; + current_param_ = thread; #ifdef ALLSCALE_HAVE_CPUFREQ - setCurrentFrequencyIdx(0); + setCurrentFrequencyIdx(0); #endif - converged_ = false; + converged_ = false; } void localoptimizer::reset(int threads, int freq_idx) { - threads_param_ = threads; - param_changes_ = 0; - thread_param_values_.clear(); + threads_param_ = threads; + param_changes_ = 0; + thread_param_values_.clear(); #ifdef ALLSCALE_HAVE_CPUFREQ - frequency_param_ = freq_idx; - frequency_param_values_.clear(); + frequency_param_ = freq_idx; + frequency_param_values_.clear(); #endif - current_objective_idx_ = 0; - steps_ = 0; - current_param_ = thread; - converged_ = false; + current_objective_idx_ = 0; + steps_ = 0; + current_param_ = thread; + converged_ = false; }; #ifdef DEBUG_ void localoptimizer::printobjectives() { - for (auto &el : objectives_) - { - std::cout << "Objective" - << "\t\t" - << "Priority" - << "\t\t" - << "Leeway" << std::endl; - switch (el.type) - { - case time: - std::cout << "Time" - << "\t\t" << el.priority << "\t\t" << el.leeway << std::endl; - break; - case energy: - std::cout << "Energy" - << "\t\t" << el.priority << "\t\t" << el.leeway << std::endl; - break; - case resource: - std::cout << "Resource" - << "\t\t" << el.priority << "\t\t" << el.leeway << std::endl; - break; - } - } + for (auto &el : objectives_) + { + std::cout << "Objective" + << "\t\t" + << "Priority" + << "\t\t" + << "Leeway" << std::endl; + switch (el.type) + { + case time: + std::cout << "Time" + << "\t\t" << el.priority << "\t\t" << el.leeway << std::endl; + break; + case energy: + std::cout << "Energy" + << "\t\t" << el.priority << "\t\t" << el.leeway << std::endl; + break; + case resource: + std::cout << "Resource" + << "\t\t" << el.priority << "\t\t" << el.leeway << std::endl; + break; + } + } } void localoptimizer::printverbosesteps(actuation act) { - static int last_frequency_idx = 0; - - std::cout << "[INFO]"; - if (optmethod_ == random) - std::cout << "Random "; - else if (optmethod_ == allscale) - { - std::cout << "Allscale "; - } - std::cout << "Scheduler Step: Setting OS Threads to " << threads_param_; + static int last_frequency_idx = 0; + + std::cout << "[INFO]"; + if (optmethod_ == random) + std::cout << "Random "; + else if (optmethod_ == allscale) + { + std::cout << "Allscale "; + } + std::cout << "Scheduler Step: Setting OS Threads to " << threads_param_; #ifdef ALLSCALE_HAVE_CPUFREQ - if (act.frequency_idx >= 0) - last_frequency_idx = act.frequency_idx; - std::cout << " , CPU Frequency to " << frequencies_param_allowed_[last_frequency_idx] - << std::endl; + if (act.frequency_idx >= 0) + last_frequency_idx = act.frequency_idx; + std::cout << " , CPU Frequency to " << frequencies_param_allowed_[last_frequency_idx] + << std::endl; #else - std::cout << std::endl; + std::cout << std::endl; #endif } #endif -void localoptimizer::measureObjective(double iter_time, double power, double threads) +void localoptimizer::accumulate_objective_measurements() { - std::cout << "Measuring objective: " - << iter_time << " " - << power << " " - << threads << std::endl; - - for (auto &el : objectives_) - { - switch (el.type) - { - case time: - el.samples.insert(el.samples.begin(), iter_time); - if (el.samples.size() > 1000) - el.samples.resize(500); - - el.threads_samples.insert(el.threads_samples.begin(), threads); - if (el.threads_samples.size() > 1000) - el.threads_samples.resize(500); + if (pending_num_times) + { + pending_time /= (double)pending_num_times; + pending_threads /= (double)pending_num_times; + pending_energy /= (double)pending_num_times; + pending_num_times = 0; + } +} #ifdef ALLSCALE_HAVE_CPUFREQ - el.freq_samples.insert(el.freq_samples.begin(), getCurrentFrequencyIdx()); - if (el.freq_samples.size() > 1000) - el.freq_samples.resize(500); -#endif +void localoptimizer::initialize_nmd() +{ + // VV: Retrieve measurements for last exploration + if ( steps_ == warmup_steps_ +1 ) + { + accumulate_objective_measurements(); - if (el.globalmin > iter_time) - { - el.globalmin = iter_time; - el.min_params_idx = param_changes_; - } - if (el.globalmax < iter_time) - el.globalmax = iter_time; -#ifdef DEBUG__ - std::cout << "Iteration Time Minimum: " << el.globalmin << std::endl; - std::cout << "Iteration Time Maximum: " << el.globalmax << std::endl; - std::cout << "Iteration Time Samples: "; - for (auto &samp : el.samples) - std::cout << samp << ","; - std::cout << std::endl; -#endif - break; - case energy: - el.samples.insert(el.samples.begin(), power); - if (el.samples.size() > 1000) - el.samples.resize(500); + initialization_samples[steps_ - 2][0] = pending_time; + initialization_samples[steps_ - 2][1] = pending_energy; + initialization_samples[steps_ - 2][2] = pending_threads; - el.threads_samples.insert(el.threads_samples.begin(), threads); - if (el.threads_samples.size() > 1000) - el.threads_samples.resize(500); + reset_accumulated_measurements(); -#ifdef ALLSCALE_HAVE_CPUFREQ - el.freq_samples.insert(el.freq_samples.begin(), getCurrentFrequencyIdx()); - if (el.freq_samples.size() > 1000) - el.freq_samples.resize(500); -#endif + initialization_params[steps_ - 2][1] = getCurrentFrequencyIdx(); + } + + // VV: Place reasonable limits to #threads and cpu_freq tunable knobs + double min_threads = round(max_threads_ * 0.25); - if (el.globalmin > power) - { - el.globalmin = power; - el.min_params_idx = param_changes_; - } - if (el.globalmax < power) - el.globalmax = power; -#ifdef DEBUG__ - std::cout << "Power Consumption Minimum: " << el.globalmin << std::endl; - std::cout << "Power Consumption Maximum: " << el.globalmax << std::endl; - std::cout << "Power Consumption Samples: "; - for (auto &samp : el.samples) - std::cout << samp << ","; - std::cout << std::endl; -#endif - break; - case resource: - el.samples.insert(el.samples.begin(), threads); - if (el.samples.size() > 1000) - el.samples.resize(500); + if (min_threads < 1.0) + min_threads = 1.0; - el.threads_samples.insert(el.threads_samples.begin(), threads); - if (el.threads_samples.size() > 1000) - el.threads_samples.resize(500); + double constraint_min[] = {min_threads, 0}; + double constraint_max[] = {(double)max_threads_, + (double)frequencies_param_allowed_.size() - 1}; -#ifdef ALLSCALE_HAVE_CPUFREQ - el.freq_samples.insert(el.freq_samples.begin(), getCurrentFrequencyIdx()); - if (el.freq_samples.size() > 1000) - el.freq_samples.resize(500); -#endif + nmd.initialize_simplex(initialization_params, + initialization_samples, + opt_weights, + constraint_min, constraint_max); - if (el.globalmin > threads) - { - el.globalmin = threads; - el.min_params_idx = param_changes_; - } - if (el.globalmax < threads) - el.globalmax = threads; -#ifdef DEBUG__ - std::cout << "Threads Minimum: " << el.globalmin << std::endl; - std::cout << "Threads Maximum: " << el.globalmax << std::endl; - std::cout << "Threads Samples: "; - for (auto &samp : el.samples) - std::cout << samp << ","; - std::cout << std::endl; -#endif - break; - } - } + mo_initialized = true; + explore_knob_domain = true; } - -actuation localoptimizer::step() -{ - steps_++; - actuation act; - act.delta_threads = threads_param_; -#ifdef ALLSCALE_HAVE_CPUFREQ - act.frequency_idx = frequency_param_; -#endif - /* random optimization step */ - if (optmethod_ == random) - { - act.delta_threads = (rand() % max_threads_); -#ifdef ALLSCALE_HAVE_CPUFREQ - act.frequency_idx = rand() % frequencies_param_allowed_.size(); - // if (act.frequency_idx == frequency_param_) - // act.frequency_idx = -1; #endif - } - - else if (optmethod_ == allscale) - { - if (current_objective_idx_ > objectives_.size()) - goto validate_act; - if (steps_ < warmup_steps_) - { +void localoptimizer::measureObjective(double iter_time, double power, double threads) +{ + std::cout << "Measuring objective: " + << iter_time << " " + << power << " " + << threads << std::endl; + + if (steps_) + { + pending_time += iter_time; + pending_energy += power; + pending_threads += threads; + pending_num_times++; + } +} -#ifdef DEBUG_MULTIOBJECTIVE_ - std::cout << "[LOCALOPTIMIZER|INFO] Optimizer No-OP: either at warm-up or optimizer has completed\n"; -#endif - // set some random parametrization to collect at least 3 different - // vertices to be used as input to the optimizer +void localoptimizer::reset_accumulated_measurements() +{ + pending_time = 0.; + pending_energy = 0.; + pending_threads = 0.; + pending_num_times = 0; +} -#if 1 - float bucket_dt = steps_ / (float)warmup_steps_; - float _min_threads = max_threads_ * bucket_dt; +actuation localoptimizer::step() +{ - act.delta_threads = rand() % (int)ceil(bucket_dt) + roundf(_min_threads); + steps_++; + actuation act; + act.delta_threads = threads_param_; #ifdef ALLSCALE_HAVE_CPUFREQ - float _min_freqs = frequencies_param_allowed_.size() * bucket_dt; - act.frequency_idx = rand() % (int)ceil(bucket_dt) + roundf(_min_freqs); -#endif + act.frequency_idx = frequency_param_; #endif - goto validate_act; - } - - // iterate over all objectives in decreasing priority - objective obj = objectives_[current_objective_idx_]; - - // initialize optimizer for this objective, if not already done so - if (!obj.initialized) - { -#ifdef DEBUG_MULTIOBJECTIVE_ - std::cout << "[LOCALOPTIMIZER|INFO] Initializing optimizer for new objective\n"; - std::cout << "[LOCALOPTIMIZER|DEBUG] Samples: " << std::flush; - for (auto &sam : obj.samples) - { - std::cout << sam << "," << std::flush; - } - std::cout << "\n" - << std::flush; - - std::cout << "[LOCALOPTIMIZER|DEBUG] Thread Param of Samples: " << std::flush; - for (auto &sam : obj.threads_samples) - { - std::cout << sam << "," << std::flush; - } - std::cout << "\n" - << std::flush; - + /* random optimization step */ + if (optmethod_ == random) + { + act.delta_threads = (rand() % max_threads_); #ifdef ALLSCALE_HAVE_CPUFREQ - std::cout << "[LOCALOPTIMIZER|DEBUG] Freq Param of Samples: " << std::flush; - for (auto &sam : obj.freq_samples) - { - std::cout << sam << "," << std::flush; - } - std::cout << "\n" - << std::flush; + act.frequency_idx = rand() % frequencies_param_allowed_.size(); + // if (act.frequency_idx == frequency_param_) + // act.frequency_idx = -1; #endif -#endif - int samplenr = obj.samples.size(); + } #ifdef ALLSCALE_HAVE_CPUFREQ - double params[3][2] = { - {obj.threads_samples[samplenr - 1], obj.freq_samples[samplenr - 1]}, - {obj.threads_samples[samplenr - 2], obj.freq_samples[samplenr - 2]}, - {obj.threads_samples[samplenr - 3], obj.freq_samples[samplenr - 3]}, - }; - double values[3] = {obj.samples[samplenr - 1], obj.samples[samplenr - 2], obj.samples[samplenr - 3]}; - double min_threads = round(max_threads_ * 0.25); - - if (min_threads < 1.0) - min_threads = 1.0; - - double constraint_min[] = {min_threads, 0}; - double constraint_max[] = {(double)max_threads_, - (double)frequencies_param_allowed_.size() - 1}; - std::cout << "initialize_simplex::Initializing with " << frequencies_param_allowed_.size() << " frequencies" << std::endl; - nmd.initialize_simplex(params, values, constraint_min, constraint_max); - objectives_[current_objective_idx_].initialized = true; -#endif - } - + else if (optmethod_ == allscale) + { + if (steps_ <= warmup_steps_) + { #ifdef DEBUG_MULTIOBJECTIVE_ - std::cout << "[LOCALOPTIMIZER|DEBG] Current Optimized Objective ="; - switch (obj.type) - { - case energy: - std::cout << "********** Energy\n" - << std::flush; - break; - case time: - std::cout << "&&&&&&&&&& Time\n" - << std::flush; - break; - case resource: - std::cout << "oooooooooo Resource\n" - << std::flush; - break; - } - std::cout << "[LOCALOPTIMIZER|DEBUG] Samples: " << std::flush; - for (auto &sam : obj.samples) - { - std::cout << sam << "," << std::flush; - } - std::cout << "\n" - << std::flush; - - std::cout << "[LOCALOPTIMIZER|DEBUG] Freq Param of Samples: " << std::flush; -#ifdef ALLSCALE_HAVE_CPUFREQ - for (auto &sam : obj.freq_samples) - { - std::cout << sam << "," << std::flush; - } - std::cout << "\n" - << std::flush; -#endif -#endif - - optstepresult nmd_res = nmd.step(obj.samples[0]); + std::cout << "[LOCALOPTIMIZER|INFO] Optimizer No-OP: either at warm-up or optimizer has completed\n"; +#endif + // set some random parametrization to collect at least 3 different + // vertices to be used as input to the optimizer + + //VV: TODO Ensure that we don't pick the same 3 configurations + float bucket_dt = steps_ / (float)warmup_steps_; + float _min_threads = max_threads_ * bucket_dt; + + act.delta_threads = rand() % (int)ceil(bucket_dt) + roundf(_min_threads); + + float _min_freqs = frequencies_param_allowed_.size() * bucket_dt; + act.frequency_idx = rand() % (int)ceil(bucket_dt) + roundf(_min_freqs); + + if (steps_ > 1) + { + accumulate_objective_measurements(); + initialization_samples[steps_ - 2][0] = pending_time; + initialization_samples[steps_ - 2][1] = pending_energy; + initialization_samples[steps_ - 2][2] = pending_threads; + reset_accumulated_measurements(); + initialization_params[steps_ - 2][0] = getCurrentThreads(); + + initialization_params[steps_ - 2][1] = getCurrentFrequencyIdx(); + + } + goto validate_act; + } + + if (mo_initialized == false) + initialize_nmd(); + + accumulate_objective_measurements(); + const double latest_measurements[] = {pending_time, + pending_energy, + pending_threads}; + reset_accumulated_measurements(); + + if ( explore_knob_domain ){ + optstepresult nmd_res = nmd.step(latest_measurements); #ifdef DEBUG_MULTIOBJECTIVE_ - std::cout << "[LOCALOPTIMIZER|DEBUG] Calling NMD Optimizer Step, Param = \n"; - std::cout << "[LOCALOPTIMIZER|DEBUG] New Vertex to try: "; - std::cout << "Threads = " << nmd_res.threads; -#ifdef ALLSCALE_HAVE_CPUFREQ - std::cout << " Freq Idx = " << nmd_res.freq_idx << std::endl; -#endif - std::cout << "Converg Thresh = " << convergence_threshold_ << std::endl; -#endif - if (nmd_res.converged) - { - objectives_[current_objective_idx_].converged = true; - objectives_[current_objective_idx_].converged_minimum = nmd.getMinObjective(); - double *minimization_point = nmd.getMinVertices(); - objectives_[current_objective_idx_].minimization_params[0] = - minimization_point[0]; - objectives_[current_objective_idx_].minimization_params[1] = - minimization_point[1]; -#ifdef DEBUG_CONVERGENCE_ - std::cout << "[LOCALOPTIMIZER|INFO] NMD convergence\n"; - std::cout << "******************************************" << std::endl; - std::cout << "[LOCALOPTIMIZER|INFO] Minimal Objective Value = " << objectives_[current_objective_idx_].converged_minimum << "Threads = " << minimization_point[0] << "Freq_idx = " << minimization_point[1] << std::endl; - std::cout << "******************************************" << std::endl; -#endif - act.delta_threads = minimization_point[0]; -#ifdef ALLSCALE_HAVE_CPUFREQ - act.frequency_idx = minimization_point[1]; + std::cout << "[LOCALOPTIMIZER|DEBUG] New Vertex to try:"; + std::cout << " Threads = " << nmd_res.threads; + std::cout << " Freq Idx = " << nmd_res.freq_idx << std::endl; + std::cout << " Converge Thresh = " << convergence_threshold_ << std::endl; #endif - current_objective_idx_++; - if (current_objective_idx_ == objectives_.size()) - { - converged_ = true; -#ifdef DEBUG_CONVERGENCE_ - std::cout << "[LOCALOPTIMIZER|INFO] ALL OBJECTIVES HAVE CONVERGED " << std::endl; -#endif - } - } - else - { -#if 0 - // if a higher priority objective starts getting off leeway margin, - // decide convergence of the current param at this parameter point - if (current_objective_idx_ > 0) - for (int i = 0; i < current_objective_idx_; i++) - { - objective priority_obj = objectives_[i]; - double max_leeway_value = priority_obj.converged_minimum + - priority_obj.leeway * (priority_obj.globalmax - priority_obj.converged_minimum); - if (priority_obj.samples[0] > max_leeway_value && - priority_obj.samples[1] > max_leeway_value) - { - objectives_[current_objective_idx_].converged = true; - objectives_[current_objective_idx_].converged_minimum = nmd.getMinObjective(); - double *minimization_point = nmd.getMinVertices(); - objectives_[current_objective_idx_].minimization_params[0] = - minimization_point[0]; - objectives_[current_objective_idx_].minimization_params[1] = - minimization_point[1]; + if (nmd_res.converged) + { + double min_score = nmd.getMinObjective(); + double *minimization_point = nmd.getMinVertices(); #ifdef DEBUG_CONVERGENCE_ - std::cout << "[LOCALOPTIMIZER|INFO] Leeway convergence\n"; - std::cout << "******************************************" << std::endl; - std::cout << "[LOCALOPTIMIZER|INFO] Minimal Objective Value = " << objectives_[current_objective_idx_].converged_minimum << "Threads = " << minimization_point[0] << "Freq_idx = " << minimization_point[1] << std::endl; - std::cout << "******************************************" << std::endl; -#endif - // find the parameter point that scores the leeway margin value - act.delta_threads = (int)priority_obj.minimization_params[0] * - (max_leeway_value / priority_obj.converged_minimum); -#ifdef ALLSCALE_HAVE_CPUFREQ - act.frequency_idx = (int)priority_obj.minimization_params[1] * - (max_leeway_value / priority_obj.converged_minimum); -#endif - current_objective_idx_++; - if (current_objective_idx_ == objectives_.size()) - { - converged_ = true; -#ifdef DEBUG_CONVERGENCE_ - std::cout << "[LOCALOPTIMIZER|INFO] ALL OBJECTIVES HAVE CONVERGED " << std::endl; -#endif - } - act.delta_threads = (nmd_res.threads == 0) ? getCurrentThreads() : nmd_res.threads; -#ifdef ALLSCALE_HAVE_CPUFREQ - act.frequency_idx = nmd_res.freq_idx; -#endif + std::cout << "[LOCALOPTIMIZER|INFO] NMD convergence\n"; + std::cout << "******************************************" << std::endl; + std::cout << "[LOCALOPTIMIZER|INFO] Minimal Objective Value = " << min_score << " Threads = " << minimization_point[0] << " Freq_idx = " << minimization_point[1] << std::endl; + std::cout << "******************************************" << std::endl; +#endif + act.delta_threads = minimization_point[0]; + act.frequency_idx = minimization_point[1]; + // VV: Stop searching for new knob_set + explore_knob_domain = false; + } else { + // VV: Have not converged yet, keep exploring + act.delta_threads = nmd_res.threads; + act.frequency_idx = nmd_res.freq_idx; + } + } + } +#endif // ALLSCALE_HAVE_CPUFREQ - goto validate_act; - } - } -#else - act.delta_threads = nmd_res.threads; - act.frequency_idx = nmd_res.freq_idx; -#endif - } - } validate_act: - if (act.delta_threads > max_threads_) - { - act.delta_threads = max_threads_; - } - else if (act.delta_threads < 1) - { - act.delta_threads = getCurrentThreads(); - } + if (act.delta_threads > max_threads_) + { + act.delta_threads = max_threads_; + } + else if (act.delta_threads < 1) + { + act.delta_threads = getCurrentThreads(); + } #ifdef ALLSCALE_HAVE_CPUFREQ - // VV: If freq_idx is -1 then set it to last used frequency (frequency_param_) - if (act.frequency_idx < 0) - act.frequency_idx = frequency_param_; - else if (act.frequency_idx > frequencies_param_allowed_.size() - 1) - { - act.frequency_idx = frequencies_param_allowed_.size() - 1; - } + // VV: If freq_idx is -1 then set it to last used frequency (frequency_param_) + if (act.frequency_idx < 0) + act.frequency_idx = frequency_param_; + else if (act.frequency_idx > frequencies_param_allowed_.size() - 1) + act.frequency_idx = frequencies_param_allowed_.size() - 1; #endif - return act; + return act; } } // namespace components } // namespace allscale diff --git a/src/components/nmsimplex_bbincr.cpp b/src/components/nmsimplex_bbincr.cpp index b96664a..414382c 100644 --- a/src/components/nmsimplex_bbincr.cpp +++ b/src/components/nmsimplex_bbincr.cpp @@ -14,12 +14,13 @@ #define NMD_DEBUG_ 1 #define NMD_INFO_ 1 -/* create the initial simplex - - vector(std::chrono::system_clock::now()).time_since_epoch().count(); + double abs_diff = 0; + for (auto j = 0; j < NMD_NUM_OBJECTIVES; ++j) + { + abs_diff += past->second.objectives[j] - objectives[j]; + past->second.objectives[j] = objectives[j]; + } + + past->second._cache_timestamp = timestamp_now; + // VV: Entries which remain relatively same should be explored less frequently + if (abs_diff > 0.1) + past->second._cache_expires_dt = CACHE_EXPIRE_AFTER_MS; + else if (past->second._cache_expires_dt < CACHE_EXPIRE_AFTER_MS * 1024) + past->second._cache_expires_dt *= 2; + + return true; + } + else if (add_if_new) + { + auto timestamp_now = std::chrono::time_point_cast(std::chrono::system_clock::now()).time_since_epoch().count(); + optstepresult entry; + entry._cache_timestamp = timestamp_now; + entry._cache_expires_dt = CACHE_EXPIRE_AFTER_MS; + entry.threads = threads; + entry.freq_idx = freq_idx; + + for (auto j = 0; j < NMD_NUM_OBJECTIVES; ++j) + entry.objectives[j] = objectives[j]; + + cache_.insert(std::make_pair(key, entry)); + + return true; + } + + return false; +} + +double NelderMead::evaluate_score(const double objectives[], const double *weights) const +{ + double score = 0.0f; + // VV: [time, energy/power, resources] + double scale[] = {1.0, 1000.0, 1.0}; + scale[2] = (double)constraint_max[0]; + + if (weights == nullptr) + weights = opt_weights; + + for (auto i = 0; i < NMD_NUM_OBJECTIVES; ++i) + { + double t = objectives[i] / scale[i]; + score += t * t * weights[i]; + } + + return score; +} + +void NelderMead::set_weights(double weights[3]) +{ + opt_weights[0] = weights[0]; + opt_weights[1] = weights[1]; + opt_weights[2] = weights[2]; + OUT_DEBUG( + std::cout << "[NelderMead|DEBUG] Weights: " + << opt_weights[0] << " " + << opt_weights[1] << " " + << opt_weights[2] << std::endl; + ) +} + /* FIXME: generalize */ -void NelderMead::initialize_simplex(double params[][2], double values[], double constraint_min[], double constraint_max[]) +void NelderMead::initialize_simplex(double params[][2], + double objectives[][3], + double weights[3], + double constraint_min[2], + double constraint_max[2]) { int i, j; + long timestamp_now = std::chrono::time_point_cast(std::chrono::system_clock::now()).time_since_epoch().count(); - for (i = 0; i <= n; i++) + for (i = 0; i < NMD_NUM_KNOBS; i++) + { + this->constraint_min[i] = constraint_min[i]; + this->constraint_max[i] = constraint_max[i]; + } + + set_weights(weights); + + // VV: Need num_knobs +1 + for (i = 0; i < NMD_NUM_KNOBS + 1; i++) { + f[i] = evaluate_score(objectives[i], weights); + for (j = 0; j < n; j++) { v[i][j] = params[i][j]; } - f[i] = values[i]; - this->constraint_min[i] = constraint_min[i]; - this->constraint_max[i] = constraint_max[i]; + + my_constraints(v[i]); + + optstepresult entry; + entry.threads = round(v[i][0]); + entry.freq_idx = round(v[i][1]); + + // VV: Check if we can re-use a previously explored configuration + auto key = std::make_pair(entry.threads, entry.freq_idx); + + auto past_entry = cache_.find(std::make_pair(entry.threads, + entry.freq_idx)); + if (past_entry != cache_.end()) + { + for (j = 0; j < NMD_NUM_OBJECTIVES; ++j) + past_entry->second.objectives[j] = objectives[i][j]; + + past_entry->second._cache_timestamp = timestamp_now; + // VV: Skip attempting to re-insert the "same" entry + continue; + } + + // VV: If we've reached this point we need to add the entry to the cache + for (j = 0; j < NMD_NUM_OBJECTIVES; ++j) + entry.objectives[j] = objectives[i][j]; + + entry._cache_timestamp = timestamp_now; + entry._cache_expires_dt = CACHE_EXPIRE_AFTER_MS; + + cache_.insert(std::make_pair(key, entry)); } itr = 0; @@ -103,13 +224,30 @@ void NelderMead::print_initial_simplex() { int i, j; std::cout << "[NelderMead DEBUG] Initial Values\n"; - for (j = 0; j <= n; j++) + + for (j = 0; j < NMD_NUM_KNOBS + 1; j++) { - for (i = 0; i < n; i++) + + for (i = 0; i < NMD_NUM_KNOBS; i++) { std::cout << v[j][i] << ","; } - std::cout << " Objective value = " << f[j] << std::endl; + const int threads = (int) v[j][0]; + const int freq_idx = (int) v[j][1]; + + auto e = cache_.find(std::make_pair(threads, freq_idx)); + std::cout << " Objective value = " << f[j]; + + if ( e == cache_.end() ) + { + std::cout << " (not in cache)" << std::endl; + } else { + std::cout << " OBJs: " + << e->second.objectives[0] << " " + << e->second.objectives[1] << " " + << e->second.objectives[2] << " " + << std::endl; + } } } @@ -138,53 +276,6 @@ void NelderMead::print_iteration() std::cout << "[NelderMead DEBUG] f[vg]= " << f[vg] << ", vg = " << vg << std::endl; } -/* find the index of the largest value */ -int NelderMead::vg_index() -{ - int j; - int vg = 0; - - for (j = 0; j <= n; j++) - { - if (f[j] > f[vg]) - { - vg = j; - } - } - return vg; -} - -/* find the index of the smallest value */ -int NelderMead::vs_index() -{ - int j; - int vs = 0; - - for (j = 0; j <= n; j++) - { - if (f[j] < f[vs]) - { - vs = j; - } - } - return vs; -} - -/* find the index of the second largest value */ -int NelderMead::vh_index() -{ - int j; - - for (j = 0; j <= n; j++) - { - if (f[j] > f[vh] && f[j] < f[vg]) - { - vh = j; - } - } - return vh; -} - /* calculate the centroid */ void NelderMead::centroid() { @@ -222,28 +313,82 @@ void NelderMead::sort_vertices() // VV: Find out what's the half-point by using a bitmap, // when vg==vs that means that all points are equal - vh = 1 + 2 + 4 - (1 << vg) - (1 << vs); - vh = map_to_index[vh]; + if (vg != vs) + { + vh = 1 + 2 + 4 - (1 << vg) - (1 << vs); + vh = map_to_index[vh]; + } + else + { + vg = 2; + vh = 1; + vs = 0; + } } -optstepresult NelderMead::do_step_start(double param) +bool NelderMead::knob_set_exists(double knobs[2], int exclude) +{ + int is_same; + + for (auto i=0; i NMD_NUM_KNOBS +1 ) { + is_invalid = knob_set_exists(vr, -1); + + if ( is_invalid ) { + extra[0] = rand() % (int)(constraint_max[0] - constraint_min[0]) + + (int) constraint_min[0] + - (int)(0.5*(constraint_max[0] - constraint_min[0])); + + extra[1] = rand() % (int)(constraint_max[1] - constraint_min[1]) + + (int) constraint_min[1] + - (int)(0.5*(constraint_max[1] - constraint_min[1])); + + } + } + + } while ( is_invalid ); - for (j = 0; j <= n - 1; j++) - { - vr[j] = vm[j] + ALPHA * (vm[j] - v[vg][j]); - } - my_constraints(vr); #ifdef NMD_DEBUG_ std::cout << "[NelderMead DEBUG] Reflection Parameter = (" << vr[0] << "," << vr[1] << ")" @@ -254,21 +399,34 @@ optstepresult NelderMead::do_step_start(double param) res.threads = vr[0]; res.freq_idx = vr[1]; + auto key = std::make_pair(res.threads, res.freq_idx); + + auto entry = cache_.find(key); + + if (entry != cache_.end()) + { + auto timestamp_now = std::chrono::time_point_cast(std::chrono::system_clock::now()).time_since_epoch().count(); + auto dt = timestamp_now - entry->second._cache_timestamp; + + if (dt < entry->second._cache_expires_dt) + { + return do_step_reflect(entry->second.objectives); + } + } + return res; } -optstepresult NelderMead::do_step_reflect(double param) +optstepresult NelderMead::do_step_reflect(const double objectives[]) { optstepresult res; #ifdef NMD_DEBUG_ std::cout << "[NelderMead DEBUG] State = Reflection" << std::endl; #endif - fr = param; + fr = evaluate_score(objectives, opt_weights); - std::cout << "fr:" << fr << " f[vh]:" << f[vh] - << " f[vs]:" << f[vs] << std::endl; - - if ( (f[vs] <= fr) && (fr < f[vh]) ) { + if ((f[vs] <= fr) && (fr < f[vh])) + { // VV: REFLECTED point is better than the SECOND BEST // but NOT better than the BEST // Replace WORST point with REFLECTED @@ -276,30 +434,112 @@ optstepresult NelderMead::do_step_reflect(double param) { v[vg][j] = vr[j]; } + + my_constraints(v[vg]); + f[vg] = fr; + + const int threads = (int)(v[vg][0]); + const int freq_idx = (int)(v[vg][1]); + + cache_update(threads, freq_idx, objectives, true); + state_ = start; - return do_step_start(param); - } else if ( fr < f[vs] ) { + return do_step_start(); + } + else if (fr < f[vs]) + { // VV: REFLECTED is better than BEST - - for ( j=0; j<=n-1; ++j) - ve[j] = vm[j] + GAMMA * (vr[j] - vm[j]); - - my_constraints(ve); + + double extra[2] = {0.0, 0.0}; + int is_invalid = 0; + int max_combinations = 0; + + max_combinations = (constraint_max[0] - constraint_min[0]+1) * (constraint_max[1] - constraint_min[1]+1); + + // VV: Try not to pick a knob_set that already exists in `v` + do { + for (j = 0; j < NMD_NUM_KNOBS; j++) + ve[j] = vm[j] + GAMMA * (vr[j] - vm[j]) + extra[j]; + + my_constraints(ve); + + is_invalid = 0; + + if ( max_combinations > NMD_NUM_KNOBS +1 ) { + is_invalid = knob_set_exists(ve, -1); + + if ( is_invalid ) { + extra[0] = rand() % (int)(constraint_max[0] - constraint_min[0]) + + (int) constraint_min[0] + - (int)(0.5*(constraint_max[0] - constraint_min[0])); + + extra[1] = rand() % (int)(constraint_max[1] - constraint_min[1]) + + (int) constraint_min[1] + - (int)(0.5*(constraint_max[1] - constraint_min[1])); + + } + } + + } while ( is_invalid ); + // VV: Now evaluate EXPANDED res.threads = ve[0]; res.freq_idx = ve[1]; state_ = expansion; + auto key = std::make_pair(res.threads, res.freq_idx); + + auto entry = cache_.find(key); + + if (entry != cache_.end()) + { + auto timestamp_now = std::chrono::time_point_cast(std::chrono::system_clock::now()).time_since_epoch().count(); + auto dt = timestamp_now - entry->second._cache_timestamp; + + if (dt < entry->second._cache_expires_dt) + { + return do_step_expand(entry->second.objectives); + } + } + return res; - } else if ( (f[vh] <= fr) && (fr < f[vg])) { + } + else if ((f[vh] <= fr) && (fr < f[vg])) + { // VV: REFLECTED between SECOND BEST and WORST - - for ( j=0; j<=n-1; ++j) - vc[j] = vm[j] + BETA * (vr[j] - vm[j]); - - my_constraints(vc); + double extra[2] = {0.0, 0.0}; + int is_invalid = 0; + int max_combinations = 0; + + max_combinations = (constraint_max[0] - constraint_min[0]+1) * (constraint_max[1] - constraint_min[1]+1); + + // VV: Try not to pick a knob_set that already exists in `v` + do { + for (j = 0; j < NMD_NUM_KNOBS; j++) + vc[j] = vm[j] + BETA * (vr[j] - vm[j]) + extra[j]; + + my_constraints(vc); + + is_invalid = 0; + + if ( max_combinations > NMD_NUM_KNOBS +1 ) { + is_invalid = knob_set_exists(vc, -1); + + if ( is_invalid ) { + extra[0] = rand() % (int)(constraint_max[0] - constraint_min[0]) + + (int) constraint_min[0] + - (int)(0.5*(constraint_max[0] - constraint_min[0])); + + extra[1] = rand() % (int)(constraint_max[1] - constraint_min[1]) + + (int) constraint_min[1] + - (int)(0.5*(constraint_max[1] - constraint_min[1])); + + } + } + + } while ( is_invalid ); // VV: Now evaluate EXPANDED res.threads = vc[0]; @@ -307,30 +547,88 @@ optstepresult NelderMead::do_step_reflect(double param) state_ = contraction; + auto key = std::make_pair(res.threads, res.freq_idx); + + auto entry = cache_.find(key); + + if (entry != cache_.end()) + { + auto timestamp_now = std::chrono::time_point_cast(std::chrono::system_clock::now()).time_since_epoch().count(); + auto dt = timestamp_now - entry->second._cache_timestamp; + + if (dt < entry->second._cache_expires_dt) + { + return do_step_contract(entry->second.objectives); + } + } + return res; - } else { + } + else + { // VV: REFLECTED worse than WORST - for ( j=0; j<=n-1; ++j) - vc[j] = vm[j] - BETA * (vr[j] - vm[j]); - - my_constraints(vc); + double extra[2] = {0.0, 0.0}; + int is_invalid = 0; + int max_combinations = 0; + + max_combinations = (constraint_max[0] - constraint_min[0]+1) * (constraint_max[1] - constraint_min[1]+1); + + // VV: Try not to pick a knob_set that already exists in `v` + do { + for (j = 0; j < NMD_NUM_KNOBS; j++) + vc[j] = vm[j] - BETA * (vr[j] - vm[j]) + extra[j]; + + my_constraints(vc); + + is_invalid = 0; + + if ( max_combinations > NMD_NUM_KNOBS +1 ) { + is_invalid = knob_set_exists(vc, -1); + + if ( is_invalid ) { + extra[0] = rand() % (int)(constraint_max[0] - constraint_min[0]) + + (int) constraint_min[0] + - (int)(0.5*(constraint_max[0] - constraint_min[0])); + + extra[1] = rand() % (int)(constraint_max[1] - constraint_min[1]) + + (int) constraint_min[1] + - (int)(0.5*(constraint_max[1] - constraint_min[1])); + + } + } + + } while ( is_invalid ); // VV: Now evaluate EXPANDED res.threads = vc[0]; res.freq_idx = vc[1]; state_ = contraction; + auto key = std::make_pair(res.threads, res.freq_idx); + + auto entry = cache_.find(key); + + if (entry != cache_.end()) + { + auto timestamp_now = std::chrono::time_point_cast(std::chrono::system_clock::now()).time_since_epoch().count(); + auto dt = timestamp_now - entry->second._cache_timestamp; + + if (dt < entry->second._cache_expires_dt) + { + return do_step_contract(entry->second.objectives); + } + } return res; } } -optstepresult NelderMead::do_step_expand(double param) +optstepresult NelderMead::do_step_expand(const double objectives[]) { #ifdef NMD_DEBUG_ std::cout << "[NelderMead DEBUG] State = Expansion" << std::endl; #endif - fe = param; + fe = evaluate_score(objectives, nullptr); if (fe < fr) { @@ -353,19 +651,23 @@ optstepresult NelderMead::do_step_expand(double param) } state_ = start; - - return do_step_start(param); + const int threads = (int)(v[vg][0]); + const int freq_idx = (int)(v[vg][1]); + + cache_update(threads, freq_idx, objectives, true); + return do_step_start(); } -optstepresult NelderMead::do_step_contract(double param) +optstepresult NelderMead::do_step_contract(const double objectives[]) { int j; #ifdef NMD_DEBUG_ std::cout << "[NelderMead|DEBUG] State = Contraction" << std::endl; #endif - fc = param; + fc = evaluate_score(objectives, nullptr); - if ( fc <= fr ) { + if (fc <= fr) + { // VV: CONTRACTED_O is better than REFLECTED // Replace WORST with CONTRACTED_O for (j = 0; j <= n - 1; j++) @@ -374,58 +676,122 @@ optstepresult NelderMead::do_step_contract(double param) } f[vg] = fc; - return do_step_start(param); - } else { + const int threads = (int)(v[vg][0]); + const int freq_idx = (int)(v[vg][1]); + + cache_update(threads, freq_idx, objectives, true); + return do_step_start(); + } + else + { // VV: Replace SECOND BEST - for (j = 0; j <= n - 1; j++) - v[vh][j] = v[vs][j] + DELTA * (v[vh][j] - v[vs][j]); - - my_constraints(v[vh]); + double new_vh[NMD_NUM_KNOBS]; + double extra[NMD_NUM_KNOBS] = {0.0, 0.0}; + int is_invalid = 0; + int max_combinations = 0; + + max_combinations = (constraint_max[0] - constraint_min[0]+1) * (constraint_max[1] - constraint_min[1]+1); + + // VV: Try not to pick a knob_set that already exists in `v` + do { + for (j = 0; j < NMD_NUM_KNOBS; j++) + new_vh[j] = v[vs][j] + DELTA * (v[vh][j] - v[vs][j]) + extra[j]; + + my_constraints(new_vh); + + is_invalid = 0; + + if ( max_combinations > NMD_NUM_KNOBS +1 ) { + is_invalid = knob_set_exists(new_vh, -1); + + if ( is_invalid ) { + extra[0] = rand() % (int)(constraint_max[0] - constraint_min[0]) + + (int) constraint_min[0] + - (int)(0.5*(constraint_max[0] - constraint_min[0])); + + extra[1] = rand() % (int)(constraint_max[1] - constraint_min[1]) + + (int) constraint_min[1] + - (int)(0.5*(constraint_max[1] - constraint_min[1])); + + } + } + + } while ( is_invalid ); + + for (j = 0; j < NMD_NUM_KNOBS; j++) + v[vh][j] = new_vh[j]; + // VV: Now evaluate SHRINK optstepresult res; res.threads = v[vh][0]; res.freq_idx = v[vh][1]; state_ = shrink; + + auto key = std::make_pair(res.threads, res.freq_idx); + + auto entry = cache_.find(key); + + if (entry != cache_.end()) + { + auto timestamp_now = std::chrono::time_point_cast(std::chrono::system_clock::now()).time_since_epoch().count(); + auto dt = timestamp_now - entry->second._cache_timestamp; + + if (dt < entry->second._cache_expires_dt) + { + return do_step_shrink(entry->second.objectives); + } + } + return res; } } -optstepresult NelderMead::do_step_shrink(double param) +optstepresult NelderMead::do_step_shrink(const double objectives[]) { #ifdef NMD_DEBUG_ std::cout << "[NelderMead|DEBUG] State = Shrink" << std::endl; #endif - f[vh] = param; - return do_step_start(param); + f[vh] = evaluate_score(objectives, nullptr); + + const int threads = (int)(v[vh][0]); + const int freq_idx = (int)(v[vh][1]); + + cache_update(threads, freq_idx, objectives, true); + + return do_step_start(); } -optstepresult NelderMead::step(double param) +optstepresult NelderMead::step(const double objectives[]) { int i, j; optstepresult res; res.threads = 0; res.freq_idx = -1; - + std::cout << "Starting step with " + << objectives[0] << " " + << objectives[1] << " " + << objectives[2] << std::endl; + switch (state_) { case start: - res = do_step_start(param); - break; + res = do_step_start(); + break; case reflection: - res = do_step_reflect(param); - break; + res = do_step_reflect(objectives); + break; case expansion: - res = do_step_expand(param); - break; + res = do_step_expand(objectives); + break; case contraction: - res = do_step_contract(param); - break; + res = do_step_contract(objectives); + break; case shrink: - res = do_step_shrink(param); - break; + res = do_step_shrink(objectives); + break; default: std::cout << "Unknown NelderMead state (" << state_ << ")" << std::endl; res.converged = false; @@ -434,11 +800,16 @@ optstepresult NelderMead::step(double param) res.converged = testConvergence(); - if ( res.converged == true ) { + if (res.converged == true) + { res.threads = v[vs][0]; res.freq_idx = v[vs][1]; std::cout << "Converged to " << res.threads << " " << res.freq_idx << std::endl; } + std::cout << "Stop step with " + << objectives[0] << " " + << objectives[1] << " " + << objectives[2] << std::endl; return res; } @@ -446,15 +817,31 @@ optstepresult NelderMead::step(double param) bool NelderMead::testConvergence() { double temp; + #if 0 + int all_same = 1; + + for (auto i = 0; i <= n; ++i) + { + for (auto k = i + 1; j <= n; ++k) + for (auto j = 0; j < n; ++j) + all_same &= (v[i][j] == v[k][j]); + } + + if (all_same) + { + min = f[vs]; + return true; + } + #endif fsum = 0.0; - for (j = 0; j <= n; j++) + for (auto j = 0; j <= n; j++) { fsum += f[j]; } favg = fsum / (n + 1); s = 0.0; - for (j = 0; j <= n; j++) + for (auto j = 0; j <= n; j++) { temp = (f[j] - favg); s += temp * temp / (n); @@ -469,56 +856,11 @@ bool NelderMead::testConvergence() return false; else { - vs = vs_index(); + sort_vertices(); min = f[vs]; return true; } } -void NelderMead::updateObjectives() -{ - /* re-evaluate all the vertices */ - /*for (j=0;j<=n;j++) { - f[j] = objfunc(v[j]); - } - */ - - /* find the index of the largest value */ - vg = vg_index(); - - /* find the index of the smallest value */ - vs = vs_index(); - - /* find the index of the second largest value */ - vh = vh_index(); - - my_constraints(v[vg]); - - //f[vg] = objfunc(v[vg]); - - my_constraints(v[vh]); - - //f[vh] = objfunc(v[vh]); -} - } // namespace components } // namespace allscale -/* - - std::vector NelderMead::minimum(){ - - - free(f); - free(vr); - free(ve); - free(vc); - free(vm); - for (i=0;i<=n;i++) { - free (v[i]); - } - free(v); - return min; - - - } - */ diff --git a/src/components/scheduler_component.cpp b/src/components/scheduler_component.cpp index e2b7df9..3b4aaba 100644 --- a/src/components/scheduler_component.cpp +++ b/src/components/scheduler_component.cpp @@ -223,8 +223,6 @@ void scheduler::init() { ) ); -// std::cout << "init: " << num_cores << " " << allscale::get_num_localities() << " " << depth_cut_off_ << '\n'; - // Reading user provided options in terms of desired optimization objectives std::string input_objective_str = hpx::get_config_entry("allscale.objective", ""); @@ -232,18 +230,24 @@ void scheduler::init() { /* Read optimization policy selected by the user. If not specified, allscale policy is the default */ std::string input_optpolicy_str = - hpx::get_config_entry("allscale.policy", "allscale"); + hpx::get_config_entry("allscale.policy", "none"); #ifdef DEBUG_MULTIOBJECTIVE_ std::cout << "[Local Optimizer|INFO] Optimization Policy Active = " << input_optpolicy_str << std::endl; #endif - if (input_optpolicy_str=="allscale") - lopt_.setPolicy(allscale); - else if (input_optpolicy_str=="random") +#if ALLSCALE_HAVE_CPUFREQ + if (input_optpolicy_str=="allscale") { + lopt_.setPolicy(allscale); + } + else +#endif + if (input_optpolicy_str=="random") lopt_.setPolicy(random); else if (input_optpolicy_str=="manual") lopt_.setPolicy(manual); - else lopt_.setPolicy(allscale); - + else if ( input_optpolicy_str != "none" ) { + HPX_THROW_EXCEPTION(hpx::bad_request, "scheduler::init", + "unknown allscale.policy"); + } #ifdef MEASURE_MANUAL_ std::string input_osthreads_str = hpx::get_config_entry("allscale.osthreads", ""); From 67ee0c16d1af5bd01fd27b94deb5fbbb76276c59 Mon Sep 17 00:00:00 2001 From: Vassilis Vassiliadis Date: Wed, 14 Nov 2018 14:21:41 +0000 Subject: [PATCH 06/37] Moved warmup stage in NMD algorithm --- allscale/components/localoptimizer.hpp | 17 +- allscale/components/nmsimplex_bbincr.hpp | 245 +++++------ allscale/components/scheduler.hpp | 4 +- src/components/localoptimizer.cpp | 93 ++--- src/components/nmsimplex_bbincr.cpp | 495 +++++++++++++---------- src/components/scheduler_component.cpp | 2 +- 6 files changed, 456 insertions(+), 400 deletions(-) diff --git a/allscale/components/localoptimizer.hpp b/allscale/components/localoptimizer.hpp index d9799cc..4b2d1ce 100644 --- a/allscale/components/localoptimizer.hpp +++ b/allscale/components/localoptimizer.hpp @@ -83,7 +83,7 @@ namespace allscale { namespace components { struct localoptimizer { localoptimizer() - :nmd(0.01), + :nmd(convergence_threshold_), pending_threads(0.), pending_energy(0.), pending_time(0.), @@ -142,10 +142,7 @@ namespace allscale { namespace components { return max_threads_; } - void setmaxthreads(std::size_t threads){ - max_threads_=threads; - threads_param_=threads; - } + void setmaxthreads(std::size_t threads); /* executes one step of multi-objective optimization */ actuation step(); @@ -178,6 +175,9 @@ namespace allscale { namespace components { } private: + // VV: Used to convert thread_idx to actual number of threads + std::size_t threads_dt; + void accumulate_objective_measurements(); void reset_accumulated_measurements(); @@ -188,9 +188,6 @@ namespace allscale { namespace components { bool explore_knob_domain; - double initialization_samples[NMD_NUM_KNOBS+1][NMD_NUM_OBJECTIVES]; - double initialization_params[NMD_NUM_KNOBS+1][NMD_NUM_KNOBS]; - double pending_time, pending_energy, pending_threads; unsigned long pending_num_times; @@ -234,8 +231,8 @@ namespace allscale { namespace components { #endif /* threshold (percentage in [0,1]) to decide convergence of optimization - steps against a single objective */ - const double convergence_threshold_ = 0.02; + steps */ + const double convergence_threshold_ = 0.01; /***** optimization state variables ******/ diff --git a/allscale/components/nmsimplex_bbincr.hpp b/allscale/components/nmsimplex_bbincr.hpp index e87fe8c..2674517 100644 --- a/allscale/components/nmsimplex_bbincr.hpp +++ b/allscale/components/nmsimplex_bbincr.hpp @@ -38,29 +38,38 @@ namespace components // VV: time, energy/power, resources #define NMD_NUM_OBJECTIVES 3 + +#if (NMD_NUM_OBJECTIVES != 3) +#error UNSUPPORTED number of Objectives +#endif + +#if (NMD_NUM_KNOBS != 2) +#error UNSUPPORTED number of Knobs +#endif + #define MAX_IT 1000 /* maximum number of iterations */ #define ALPHA 1.0 /* reflection coefficient */ -#define BETA 0.5 /* contraction coefficient */ +#define BETA 0.5 /* contraction coefficient */ #define GAMMA 2.0 /* expansion coefficient */ #define DELTA 0.5 /* shrinking coefficient */ -#define CACHE_EXPIRE_AFTER_MS 5000 +#define CACHE_EXPIRE_AFTER_MS 35000 /* structure type of a single optimization step return status */ struct optstepresult { - /* true if optimization has converged for the specified objective */ - bool converged; - /* number of threads for parameters to set for sampling */ - double threads; - /* index to frequency vector for freq parameter to set for sampling*/ - int freq_idx; - - /******VV: Cache stuff******/ - double score; - double objectives[3]; // (time, energy, resource) - // VV: _cache_expires denotes dt (in ms) after _cache_timestamp - int64_t _cache_timestamp, _cache_expires_dt; + /* true if optimization has converged for the specified objective */ + bool converged; + /* number of threads for parameters to set for sampling */ + double threads; + /* index to frequency vector for freq parameter to set for sampling*/ + int freq_idx; + + /******VV: Cache stuff******/ + double score; + double objectives[3]; // (time, energy, resource) + // VV: _cache_expires denotes dt (in ms) after _cache_timestamp + int64_t _cache_timestamp, _cache_expires_dt; }; typedef std::map, optstepresult> MapCache_t; @@ -68,143 +77,153 @@ typedef std::map, optstepresult> MapCache_t; /* enumeration encoding state that the incremental Nelder Mead optimizer is at */ enum iterationstates { - start, - reflection, - expansion, - contraction, - shrink + // VV: Need NMD_NUM_KNOBS + 1 values before we can start optimizing + warmup, + start, + reflection, + expansion, + contraction, + shrink }; + class NelderMead { -public: - NelderMead(double); - // VV: For the time being params = [threads, freq_idx] - // objectives = [time, energy/power, resources] - // weights = [ W_time, W_energy/power, W_resources ] - // constraint_min = [min_threads, min_freq_idx] - void initialize_simplex(double params[NMD_NUM_KNOBS+1][NMD_NUM_KNOBS], - double objectives[][NMD_NUM_OBJECTIVES], - double weights[NMD_NUM_OBJECTIVES], - double constraint_min[NMD_NUM_KNOBS], - double constraint_max[NMD_NUM_KNOBS]); - void print_initial_simplex(); - void print_iteration(); - - double *getMinVertices() - { - return v[vs]; - } + public: + NelderMead(double); + // VV: For the time being + // weights = [ W_time, W_energy/power, W_resources ] + // constraint_min = [min_threads, min_freq_idx] + void initialize_simplex(double weights[NMD_NUM_OBJECTIVES], + double constraint_min[NMD_NUM_KNOBS], + double constraint_max[NMD_NUM_KNOBS]); + + void print_initial_simplex(); + void print_iteration(); + + double *getMinVertices() + { + return v[vs]; + } + + double getMinObjective() + { + return min; + } + + unsigned long int getIterations() { return itr; } + double evaluate_score(const double objectives[], const double *weights) const; + void set_weights(double weights[]); + + optstepresult step(const double objectives[]); + + private: + int warming_up_step; - double getMinObjective() - { - return min; - } + // VV: Utility to make sure that we generate new values and not something that already + // exists in the set of NMD_NUM_KNOBS+1 configuration points + template + void generate_new(F &gen); + enum direction {up, up_final, down, left, right, right_final}; + std::pair explore_next_extra(double *extra, int level, + direction dir, int level_max, int level_nested_max); - unsigned long int getIterations() { return itr; } - double evaluate_score(const double objectives[], const double *weights) const; - void set_weights(double weights[]); + //VV: objective_type: { : optstepresult } + MapCache_t cache_; - optstepresult step(const double objectives[]); -private: - //VV: objective_type: { : optstepresult } - MapCache_t cache_; - - optstepresult do_step_start(); - optstepresult do_step_reflect(const double objectives[]); - optstepresult do_step_expand(const double objectives[]); - optstepresult do_step_contract(const double objectives[]); - optstepresult do_step_shrink(const double objectives[]); + optstepresult do_step_start(); + optstepresult do_step_reflect(const double objectives[]); + optstepresult do_step_expand(const double objectives[]); + optstepresult do_step_contract(const double objectives[]); + optstepresult do_step_shrink(const double objectives[]); - bool knob_set_exists(double knobs[2], int exclude); + void sort_vertices(void); + void my_constraints(double *); + void centroid(); + bool testConvergence(std::size_t tested_combinations); - void sort_vertices(void); - void my_constraints(double *); - void centroid(); - bool testConvergence(); + // VV: Will return false if entry not in cache + bool cache_update(int threads, int freq_idx, + const double objectives[], + bool add_if_new); - // VV: Will return false if entry not in cache - bool cache_update(int threads, int freq_idx, - const double objectives[], - bool add_if_new); + double round2(double num, int precision) + { + double rnum = 0.0; + int tnum; - double round2(double num, int precision) - { - double rnum = 0.0; - int tnum; + if (num == 0.0) + return num; - if (num == 0.0) - return num; + rnum = num * pow(10, precision); + tnum = (int)(rnum < 0 ? rnum - 0.5 : rnum + 0.5); + rnum = tnum / pow(10, precision); - rnum = num * pow(10, precision); - tnum = (int)(rnum < 0 ? rnum - 0.5 : rnum + 0.5); - rnum = tnum / pow(10, precision); + return rnum; + } - return rnum; - } + /* vertex with smallest value */ + int vs; - /* vertex with smallest value */ - int vs; + /* vertex with next smallest value */ + int vh; - /* vertex with next smallest value */ - int vh; + /* vertex with largest value */ + int vg; - /* vertex with largest value */ - int vg; + int i, j, row; - int i, j, row; - - const int n = 2; + const int n = 2; - /* track the number of function evaluations */ - int k; + /* track the number of function evaluations */ + int k; - /* track the number of iterations */ - int itr; + /* track the number of iterations */ + int itr; - /* holds vertices of simplex */ - double **v; + /* holds vertices of simplex */ + double **v; - /* value of function at each vertex */ - double *f; + /* value of function at each vertex */ + double *f; - /* value of function at reflection point */ - double fr; + /* value of function at reflection point */ + double fr; - /* value of function at expansion point */ - double fe; + /* value of function at expansion point */ + double fe; - /* value of function at contraction point */ - double fc; + /* value of function at contraction point */ + double fc; - /* reflection - coordinates */ - double *vr; + /* reflection - coordinates */ + double *vr; - /* expansion - coordinates */ - double *ve; + /* expansion - coordinates */ + double *ve; - /* contraction - coordinates */ - double *vc; + /* contraction - coordinates */ + double *vc; - /* centroid - coordinates */ - double *vm; + /* centroid - coordinates */ + double *vm; - double min; + double min; - double fsum, favg, s; + double fsum, favg, s; - double EPSILON; + double EPSILON; - iterationstates state_; + iterationstates state_; - const int MAXITERATIONS = 15; + const int MAXITERATIONS = 15; - double constraint_min[2]; + double constraint_min[2]; - double constraint_max[2]; + double constraint_max[2]; - double opt_weights[NMD_NUM_OBJECTIVES]; + double opt_weights[NMD_NUM_OBJECTIVES]; }; } // namespace components diff --git a/allscale/components/scheduler.hpp b/allscale/components/scheduler.hpp index c508900..0980207 100644 --- a/allscale/components/scheduler.hpp +++ b/allscale/components/scheduler.hpp @@ -110,7 +110,7 @@ namespace allscale { namespace components { long last_optimization_timestamp_; /* periodicity in milliseconds to invoke the optimizer */ - const long optimization_period_ms = 5000; + const long optimization_period_ms = 1000; /* captures absolute timestamp of the last time optimization objective value have been measured (sampled) */ @@ -119,7 +119,7 @@ namespace allscale { namespace components { long last_objective_measurement_timestamp_; /* periodicity in milliseconds to invoke objective sampling */ - const long objective_measurement_period_ms = 1000; + const long objective_measurement_period_ms = 500; //extra masks to better handle suspending/resuming threads std::vector thread_pools_; diff --git a/src/components/localoptimizer.cpp b/src/components/localoptimizer.cpp index 04ef472..14158fa 100644 --- a/src/components/localoptimizer.cpp +++ b/src/components/localoptimizer.cpp @@ -24,6 +24,7 @@ namespace allscale { namespace components { + #if 0 localoptimizer::localoptimizer(std::list targetobjectives) : objectives_((int)targetobjectives.size()), nmd(convergence_threshold_), @@ -49,6 +50,7 @@ localoptimizer::localoptimizer(std::list targetobjectives) setCurrentFrequencyIdx(0); #endif }; +#endif void localoptimizer::setobjectives(std::list targetobjectives) { @@ -153,42 +155,47 @@ void localoptimizer::accumulate_objective_measurements() if (pending_num_times) { pending_time /= (double)pending_num_times; - pending_threads /= (double)pending_num_times; + pending_threads /= (double)(pending_num_times*threads_dt); pending_energy /= (double)pending_num_times; pending_num_times = 0; } } -#ifdef ALLSCALE_HAVE_CPUFREQ -void localoptimizer::initialize_nmd() +void localoptimizer::setmaxthreads(std::size_t threads) { - // VV: Retrieve measurements for last exploration - if ( steps_ == warmup_steps_ +1 ) - { - accumulate_objective_measurements(); - - initialization_samples[steps_ - 2][0] = pending_time; - initialization_samples[steps_ - 2][1] = pending_energy; - initialization_samples[steps_ - 2][2] = pending_threads; - - reset_accumulated_measurements(); + max_threads_=threads; + threads_param_=threads; + #if 0 + double threads_tick = threads / 5.; - initialization_params[steps_ - 2][1] = getCurrentFrequencyIdx(); - } + if ( threads_tick < 1.0 ) + threads_tick = 1.0; - // VV: Place reasonable limits to #threads and cpu_freq tunable knobs - double min_threads = round(max_threads_ * 0.25); + threads_dt = (int) round(threads_tick); + #elif 0 + if ( max_threads_ <= 4 ) + threads_dt = 1.; + else if ( max_threads_ <= 8 ) + threads_dt = 2.; + else if ( max_threads_ <= 32 ) + threads_dt = 4.; + else + threads_dt = 8.; + #else + threads_dt = 1.; + #endif +} - if (min_threads < 1.0) - min_threads = 1.0; +#ifdef ALLSCALE_HAVE_CPUFREQ +void localoptimizer::initialize_nmd() +{ + // VV: Place reasonable limits to #threads and cpu_freq tunable knobs - double constraint_min[] = {min_threads, 0}; - double constraint_max[] = {(double)max_threads_, + double constraint_min[] = {1, 0}; + double constraint_max[] = {ceil(max_threads_/(double)threads_dt), (double)frequencies_param_allowed_.size() - 1}; - nmd.initialize_simplex(initialization_params, - initialization_samples, - opt_weights, + nmd.initialize_simplex(opt_weights, constraint_min, constraint_max); mo_initialized = true; @@ -242,38 +249,6 @@ actuation localoptimizer::step() #ifdef ALLSCALE_HAVE_CPUFREQ else if (optmethod_ == allscale) { - if (steps_ <= warmup_steps_) - { -#ifdef DEBUG_MULTIOBJECTIVE_ - std::cout << "[LOCALOPTIMIZER|INFO] Optimizer No-OP: either at warm-up or optimizer has completed\n"; -#endif - // set some random parametrization to collect at least 3 different - // vertices to be used as input to the optimizer - - //VV: TODO Ensure that we don't pick the same 3 configurations - float bucket_dt = steps_ / (float)warmup_steps_; - float _min_threads = max_threads_ * bucket_dt; - - act.delta_threads = rand() % (int)ceil(bucket_dt) + roundf(_min_threads); - - float _min_freqs = frequencies_param_allowed_.size() * bucket_dt; - act.frequency_idx = rand() % (int)ceil(bucket_dt) + roundf(_min_freqs); - - if (steps_ > 1) - { - accumulate_objective_measurements(); - initialization_samples[steps_ - 2][0] = pending_time; - initialization_samples[steps_ - 2][1] = pending_energy; - initialization_samples[steps_ - 2][2] = pending_threads; - reset_accumulated_measurements(); - initialization_params[steps_ - 2][0] = getCurrentThreads(); - - initialization_params[steps_ - 2][1] = getCurrentFrequencyIdx(); - - } - goto validate_act; - } - if (mo_initialized == false) initialize_nmd(); @@ -285,6 +260,7 @@ actuation localoptimizer::step() if ( explore_knob_domain ){ optstepresult nmd_res = nmd.step(latest_measurements); + #ifdef DEBUG_MULTIOBJECTIVE_ std::cout << "[LOCALOPTIMIZER|DEBUG] New Vertex to try:"; std::cout << " Threads = " << nmd_res.threads; @@ -311,6 +287,11 @@ actuation localoptimizer::step() act.delta_threads = nmd_res.threads; act.frequency_idx = nmd_res.freq_idx; } + + act.delta_threads *= threads_dt; + std::cout << "[LOCALOPTIMIZER|DEBUG] ACTUAL Vertex to try:"; + std::cout << " Threads = " << act.delta_threads; + std::cout << " Freq Idx = " << act.frequency_idx << std::endl; } } #endif // ALLSCALE_HAVE_CPUFREQ diff --git a/src/components/nmsimplex_bbincr.cpp b/src/components/nmsimplex_bbincr.cpp index 414382c..c878199 100644 --- a/src/components/nmsimplex_bbincr.cpp +++ b/src/components/nmsimplex_bbincr.cpp @@ -35,7 +35,7 @@ NelderMead::NelderMead(double eps) std::cout << "[NelderMead|INFO] Initial Convergence Threshold set is " << EPSILON << std::endl; #endif itr = 0; - state_ = start; + state_ = warmup; /* dynamically allocate arrays */ @@ -46,6 +46,8 @@ NelderMead::NelderMead(double eps) ve = (double *)malloc(n * sizeof(double)); vc = (double *)malloc(n * sizeof(double)); vm = (double *)malloc(n * sizeof(double)); + + warming_up_step = 0; /* allocate the columns of the arrays */ for (i = 0; i <= n; i++) @@ -54,6 +56,143 @@ NelderMead::NelderMead(double eps) } } +std::pair NelderMead::explore_next_extra(double *extra, int level, + direction dir, + int level_max, int level_nested_max) +{ + /* + const char *to_string[] = { + "up", "up_final", "down", "left", "right", "right_final" + }; + */ + if ( extra[0] == 0.0 && extra[1] == 0.0 ) { + extra[1] = 1.0; + + return std::make_pair(level, dir); + } + switch (dir) { + case (direction::up): + if ( extra[1] < level ) { + extra[1] += 1.; + } else if( extra[0] < level_nested_max ) { + extra[0] += 1.; + dir = direction::right; + } else { + level ++; + } + break; + + case (direction::up_final): + if ( extra[1] < level ) { + extra[1] += 1.; + } else if( extra[0] < level_nested_max ) { + extra[0] += 1.; + dir = direction::right_final; + } else { + level ++; + } + break; + + + case (direction::down): + if ( extra[1] > -level ) { + extra[1] -= 1.0; + } else if ( extra[0] > -level_nested_max ){ + extra[0] -= 1.0; + dir = direction::left; + } + break; + + case (direction::left): + if ( extra[0] > -level_nested_max ) { + extra[0] -= 1.0; + } else if (extra[1] < level ) { + extra[1] += 1.0; + dir = direction::up_final; + } + break; + + case (direction::right): + if ( extra[0] < level_nested_max ) { + extra[0] += 1.; + } else if ( extra[1] <= level ) { + extra[1] -= 1.; + dir = direction::down; + } + break; + + case (direction::right_final): + if ( extra[0] < 0. ) { + extra[0] += 1.; + } else { + level ++; + extra[0] = 0.0; + extra[1] = level; + dir = direction::right; + } + break; + } + + return std::make_pair(level, dir); +} + +template +void NelderMead::generate_new(F &gen) +{ + double extra[] = {0, 0}; + double *new_set; + int i = 0; + int max_combinations = (constraint_max[0] - constraint_min[0]+1) + * (constraint_max[1] - constraint_min[1]+1); + int level = 1; + int max_nested_level = constraint_max[1] - constraint_min[1] +1; + int max_level = constraint_max[0] - constraint_min[0] +1; + direction dir = direction::right; + + // VV: Search for a twice as big space to take into account that + // new_set is not *actually* at 0, 0 + + max_level *= 2; + max_nested_level *=2; + + int is_same; + do + { + new_set = gen(extra); + + auto key = std::make_pair((int)new_set[0], (int)new_set[1]); + auto entry = cache_.find(key); + is_same = (entry != cache_.end()); + + if ( ( level < max_level +1) + && is_same + && max_combinations > (NMD_NUM_KNOBS + 1)) + { + # if 0 + extra[0] = rand() % (int)(constraint_max[0] - constraint_min[0]) + + (int)constraint_min[0] + - (int)(0.5 * (constraint_max[0] - constraint_min[0])); + + extra[1] = rand() % (int)(constraint_max[1] - constraint_min[1]) + + (int)constraint_min[1] + - (int)(0.5 * (constraint_max[1] - constraint_min[1])); + #else + auto logistics = explore_next_extra(extra, level, dir, + max_level, max_nested_level); + level = logistics.first; + dir = logistics.second; + + #endif + OUT_DEBUG( + std::cout << "[NelderMead|Debug] Rejecting " + << new_set[0] << " " << new_set[1] << std::endl; + ) + } else { + break; + } + } while ( 1 ); +} + void NelderMead::my_constraints(double x[]) { // round to integer and bring again with allowable margins @@ -126,7 +265,7 @@ bool NelderMead::cache_update(int threads, int freq_idx, double NelderMead::evaluate_score(const double objectives[], const double *weights) const { - double score = 0.0f; + double score; // VV: [time, energy/power, resources] double scale[] = {1.0, 1000.0, 1.0}; scale[2] = (double)constraint_max[0]; @@ -134,12 +273,19 @@ double NelderMead::evaluate_score(const double objectives[], const double *weigh if (weights == nullptr) weights = opt_weights; + #if 0 + score = 0.0; for (auto i = 0; i < NMD_NUM_OBJECTIVES; ++i) { double t = objectives[i] / scale[i]; score += t * t * weights[i]; } - + #else + score = 0.0; + for ( auto i=0; isecond.objectives[j] = objectives[i][j]; - - past_entry->second._cache_timestamp = timestamp_now; - // VV: Skip attempting to re-insert the "same" entry - continue; - } - - // VV: If we've reached this point we need to add the entry to the cache - for (j = 0; j < NMD_NUM_OBJECTIVES; ++j) - entry.objectives[j] = objectives[i][j]; - - entry._cache_timestamp = timestamp_now; - entry._cache_expires_dt = CACHE_EXPIRE_AFTER_MS; - - cache_.insert(std::make_pair(key, entry)); - } + state_ = warmup; itr = 0; - - state_ = start; + warming_up_step = 0; } /* print out the initial values */ @@ -326,68 +429,31 @@ void NelderMead::sort_vertices() } } -bool NelderMead::knob_set_exists(double knobs[2], int exclude) -{ - int is_same; - - for (auto i=0; i double* { + for (j = 0; j < NMD_NUM_KNOBS; j++) vr[j] = vm[j] + ALPHA * (vm[j] - v[vg][j]) + extra[j]; - + my_constraints(vr); - - is_invalid = 0; - if ( max_combinations > NMD_NUM_KNOBS +1 ) { - is_invalid = knob_set_exists(vr, -1); - - if ( is_invalid ) { - extra[0] = rand() % (int)(constraint_max[0] - constraint_min[0]) - + (int) constraint_min[0] - - (int)(0.5*(constraint_max[0] - constraint_min[0])); - - extra[1] = rand() % (int)(constraint_max[1] - constraint_min[1]) - + (int) constraint_min[1] - - (int)(0.5*(constraint_max[1] - constraint_min[1])); - - } - } - - } while ( is_invalid ); + return vr; + }; + + generate_new(gen_new); #ifdef NMD_DEBUG_ std::cout << "[NelderMead DEBUG] Reflection Parameter = (" @@ -450,38 +516,16 @@ optstepresult NelderMead::do_step_reflect(const double objectives[]) else if (fr < f[vs]) { // VV: REFLECTED is better than BEST - - double extra[2] = {0.0, 0.0}; - int is_invalid = 0; - int max_combinations = 0; - - max_combinations = (constraint_max[0] - constraint_min[0]+1) * (constraint_max[1] - constraint_min[1]+1); - - // VV: Try not to pick a knob_set that already exists in `v` - do { + auto gen_new = [this](double *extra) mutable -> double* { for (j = 0; j < NMD_NUM_KNOBS; j++) ve[j] = vm[j] + GAMMA * (vr[j] - vm[j]) + extra[j]; - + my_constraints(ve); - - is_invalid = 0; - - if ( max_combinations > NMD_NUM_KNOBS +1 ) { - is_invalid = knob_set_exists(ve, -1); - - if ( is_invalid ) { - extra[0] = rand() % (int)(constraint_max[0] - constraint_min[0]) - + (int) constraint_min[0] - - (int)(0.5*(constraint_max[0] - constraint_min[0])); - - extra[1] = rand() % (int)(constraint_max[1] - constraint_min[1]) - + (int) constraint_min[1] - - (int)(0.5*(constraint_max[1] - constraint_min[1])); - - } - } - - } while ( is_invalid ); + + return ve; + }; + + generate_new(gen_new); // VV: Now evaluate EXPANDED res.threads = ve[0]; @@ -509,38 +553,17 @@ optstepresult NelderMead::do_step_reflect(const double objectives[]) else if ((f[vh] <= fr) && (fr < f[vg])) { // VV: REFLECTED between SECOND BEST and WORST - double extra[2] = {0.0, 0.0}; - int is_invalid = 0; - int max_combinations = 0; - - max_combinations = (constraint_max[0] - constraint_min[0]+1) * (constraint_max[1] - constraint_min[1]+1); - - // VV: Try not to pick a knob_set that already exists in `v` - do { + auto gen_new = [this](double *extra) mutable -> double* { for (j = 0; j < NMD_NUM_KNOBS; j++) vc[j] = vm[j] + BETA * (vr[j] - vm[j]) + extra[j]; - + my_constraints(vc); - - is_invalid = 0; - - if ( max_combinations > NMD_NUM_KNOBS +1 ) { - is_invalid = knob_set_exists(vc, -1); - - if ( is_invalid ) { - extra[0] = rand() % (int)(constraint_max[0] - constraint_min[0]) - + (int) constraint_min[0] - - (int)(0.5*(constraint_max[0] - constraint_min[0])); - - extra[1] = rand() % (int)(constraint_max[1] - constraint_min[1]) - + (int) constraint_min[1] - - (int)(0.5*(constraint_max[1] - constraint_min[1])); - - } - } - - } while ( is_invalid ); + return vc; + }; + + generate_new(gen_new); + // VV: Now evaluate EXPANDED res.threads = vc[0]; res.freq_idx = vc[1]; @@ -567,37 +590,16 @@ optstepresult NelderMead::do_step_reflect(const double objectives[]) else { // VV: REFLECTED worse than WORST - double extra[2] = {0.0, 0.0}; - int is_invalid = 0; - int max_combinations = 0; - - max_combinations = (constraint_max[0] - constraint_min[0]+1) * (constraint_max[1] - constraint_min[1]+1); - - // VV: Try not to pick a knob_set that already exists in `v` - do { + auto gen_new = [this](double *extra) mutable -> double* { for (j = 0; j < NMD_NUM_KNOBS; j++) vc[j] = vm[j] - BETA * (vr[j] - vm[j]) + extra[j]; - + my_constraints(vc); - - is_invalid = 0; - - if ( max_combinations > NMD_NUM_KNOBS +1 ) { - is_invalid = knob_set_exists(vc, -1); - - if ( is_invalid ) { - extra[0] = rand() % (int)(constraint_max[0] - constraint_min[0]) - + (int) constraint_min[0] - - (int)(0.5*(constraint_max[0] - constraint_min[0])); - - extra[1] = rand() % (int)(constraint_max[1] - constraint_min[1]) - + (int) constraint_min[1] - - (int)(0.5*(constraint_max[1] - constraint_min[1])); - - } - } - - } while ( is_invalid ); + + return vc; + }; + + generate_new(gen_new); // VV: Now evaluate EXPANDED res.threads = vc[0]; @@ -686,37 +688,17 @@ optstepresult NelderMead::do_step_contract(const double objectives[]) { // VV: Replace SECOND BEST double new_vh[NMD_NUM_KNOBS]; - double extra[NMD_NUM_KNOBS] = {0.0, 0.0}; - int is_invalid = 0; - int max_combinations = 0; - - max_combinations = (constraint_max[0] - constraint_min[0]+1) * (constraint_max[1] - constraint_min[1]+1); - - // VV: Try not to pick a knob_set that already exists in `v` - do { - for (j = 0; j < NMD_NUM_KNOBS; j++) + + auto gen_new = [this, &new_vh](double *extra) mutable -> double* { + for (auto j = 0; j < NMD_NUM_KNOBS; j++) new_vh[j] = v[vs][j] + DELTA * (v[vh][j] - v[vs][j]) + extra[j]; - + my_constraints(new_vh); - - is_invalid = 0; - - if ( max_combinations > NMD_NUM_KNOBS +1 ) { - is_invalid = knob_set_exists(new_vh, -1); - - if ( is_invalid ) { - extra[0] = rand() % (int)(constraint_max[0] - constraint_min[0]) - + (int) constraint_min[0] - - (int)(0.5*(constraint_max[0] - constraint_min[0])); - - extra[1] = rand() % (int)(constraint_max[1] - constraint_min[1]) - + (int) constraint_min[1] - - (int)(0.5*(constraint_max[1] - constraint_min[1])); - - } - } - - } while ( is_invalid ); + + return new_vh; + }; + + generate_new(gen_new); for (j = 0; j < NMD_NUM_KNOBS; j++) v[vh][j] = new_vh[j]; @@ -769,15 +751,69 @@ optstepresult NelderMead::step(const double objectives[]) optstepresult res; res.threads = 0; res.freq_idx = -1; - std::cout << "Starting step with " - << objectives[0] << " " - << objectives[1] << " " - << objectives[2] << std::endl; + OUT_DEBUG( + std::cout << "[NelderMead|DEBUG] Starting step with " + << objectives[0] << " " + << objectives[1] << " " + << objectives[2] << std::endl; + ) + std::size_t tested_combinations = cache_.size(); + switch (state_) { + case warmup: + { + #ifdef NMD_DEBUG_ + std::cout << "[NelderMead|DEBUG] State = Warmup " + << warming_up_step << std::endl; + #endif + if ( warming_up_step > 0 ) { + // VV: Record results of last warming up step + f[warming_up_step-1] = evaluate_score(objectives, nullptr); + cache_update(v[warming_up_step-1][0], v[warming_up_step-1][1], + objectives, true); + } + if ( warming_up_step == NMD_NUM_KNOBS + 1) { + // VV: We need not explore the knob_set space anymore + state_ = start; + return step(objectives); + } + + // VV: Start at 25% threads with lowest CPU Freq, then 75% threads with max freq + // and 100% threads with max freq + + int threads_low = round(0.25 * (constraint_max[0] - constraint_min[1]) + + constraint_min[1]); + int threads_med = round(0.75 * (constraint_max[0] - constraint_min[1]) + + constraint_min[1]); + int threads_max = constraint_max[0]; + + const int initial_configurations[NMD_NUM_KNOBS+1][NMD_NUM_KNOBS] = { + {threads_low, (int)constraint_min[1]}, + {threads_med, (int)constraint_max[1]}, + {threads_max, (int)constraint_max[1]}, + }; + + optstepresult res; + res.objectives[0] = -1; + res.objectives[1] = -1; + res.objectives[2] = -1; + res.converged = false; + res.score = -1; + res.threads = initial_configurations[warming_up_step][0]; + res.freq_idx = initial_configurations[warming_up_step][1]; + + v[warming_up_step][0] = res.threads; + v[warming_up_step][1] = res.freq_idx; + warming_up_step++; + + return res; + } + break; case start: + itr++; res = do_step_start(); break; case reflection: @@ -798,14 +834,17 @@ optstepresult NelderMead::step(const double objectives[]) return res; } - res.converged = testConvergence(); + res.converged = testConvergence(tested_combinations); if (res.converged == true) { res.threads = v[vs][0]; res.freq_idx = v[vs][1]; - std::cout << "Converged to " << res.threads << " " << res.freq_idx << std::endl; + OUT_DEBUG( + std::cout << "[NelderMead|DEBUG] Converged to " << res.threads << " " << res.freq_idx << std::endl; + ) } + std::cout << "Stop step with " << objectives[0] << " " << objectives[1] << " " @@ -814,7 +853,7 @@ optstepresult NelderMead::step(const double objectives[]) return res; } -bool NelderMead::testConvergence() +bool NelderMead::testConvergence(std::size_t tested_combinations) { double temp; #if 0 @@ -852,12 +891,32 @@ bool NelderMead::testConvergence() std::cout << "[NelderMead|INFO] Convergence Ratio is " << s << std::endl; std::cout << "[NelderMead|INFO] Convergence Threshold set is " << EPSILON << std::endl; #endif - if (s >= EPSILON && itr <= MAXITERATIONS) + int max_combinations = (constraint_max[0] - constraint_min[0]+1) + * (constraint_max[1] - constraint_min[1]+1); + + if ( (s >= EPSILON) + && (itr <= MAXITERATIONS) + && (max_combinations != tested_combinations) ) return false; else { sort_vertices(); min = f[vs]; + + OUT_DEBUG( + std::cout << "[NelderMead|Debug] Cache_ Max: " << max_combinations + << " explored " << tested_combinations << std::endl; + for (const auto &entry: cache_ ) { + std::cout << "[NelderMead|Debug] Cache_ " + << entry.second.threads << " " + << entry.second.freq_idx << " :: " + << entry.second.objectives[0] << " " + << entry.second.objectives[1] << " " + << entry.second.objectives[2] << " :: " + << evaluate_score(entry.second.objectives, nullptr) << std::endl; + } + ) + return true; } } diff --git a/src/components/scheduler_component.cpp b/src/components/scheduler_component.cpp index 3b4aaba..cb5d936 100644 --- a/src/components/scheduler_component.cpp +++ b/src/components/scheduler_component.cpp @@ -804,7 +804,7 @@ void scheduler::optimize_locally(work_item const& work) elapsedTimeMs = t_duration_now - last_optimization_timestamp_; - if (elapsedTimeMs > optimization_period_ms){ + if (elapsedTimeMs > optimization_period_ms || nr_opt_steps == 0){ last_optimization_timestamp_= t_duration_now; nr_opt_steps++; actuation act_temp = lopt_.step(); From 755f0b6fbdd6f36e788fceebd19abf11740f6e31 Mon Sep 17 00:00:00 2001 From: Vassilis Vassiliadis Date: Wed, 14 Nov 2018 16:43:01 +0000 Subject: [PATCH 07/37] Default to highest CPU if using CPUFREQ Do a final run once NMD converges to make sure that the scores of the simplices are not stale --- allscale/components/localoptimizer.hpp | 389 ++++++++++++----------- allscale/components/nmsimplex_bbincr.hpp | 3 + src/components/localoptimizer.cpp | 19 ++ src/components/nmsimplex_bbincr.cpp | 61 +++- src/components/scheduler_component.cpp | 12 +- 5 files changed, 285 insertions(+), 199 deletions(-) diff --git a/allscale/components/localoptimizer.hpp b/allscale/components/localoptimizer.hpp index 4b2d1ce..d59bf16 100644 --- a/allscale/components/localoptimizer.hpp +++ b/allscale/components/localoptimizer.hpp @@ -19,243 +19,270 @@ #define DEBUG_ 1 #define DEBUG_MULTIOBJECTIVE_ 1 -namespace allscale { namespace components { - - enum objectiveType {time, energy, resource}; - - enum parameterType {thread, frequency}; - - enum searchPolicy {allscale, random, manual}; - - /* structure type of a single optimization objective */ - struct objective{ - double last_scores[3]; - - objectiveType type; - /* leeway threshold desired, 0-1 double */ - double leeway; - /* non-negative integer priority of the objective, 0 is highest priority*/ - int priority; - /* local minimum during single objective optimization */ - double localmin; - /* local maximum during single objective optimization */ - double localmax; - /* local minimum during single objective optimization */ - double globalmin; - /* local minimum during single objective optimization */ - double globalmax; - /* current deviation of the objective value from observed min */ - double currentthreshold; - /* sampled objective values throughout execution */ - std::vector samples; - /* thread number that lead to the objective value in samples vector */ - std::vector threads_samples; - /* frequency index that lead to the objective value in samples vector */ - std::vector freq_samples; - /* true if optimization of objective has converged, false otherwise */ - bool converged; - /* true if optimizer for objective has been initialized, false otherwise */ - bool initialized; - /* index to the parameter vectors for setup that has so far achieved +namespace allscale +{ +namespace components +{ + +enum objectiveType +{ + time, + energy, + resource +}; + +enum parameterType +{ + thread, + frequency +}; + +enum searchPolicy +{ + allscale, + random, + manual +}; + +/* structure type of a single optimization objective */ +struct objective +{ + double last_scores[3]; + + objectiveType type; + /* leeway threshold desired, 0-1 double */ + double leeway; + /* non-negative integer priority of the objective, 0 is highest priority*/ + int priority; + /* local minimum during single objective optimization */ + double localmin; + /* local maximum during single objective optimization */ + double localmax; + /* local minimum during single objective optimization */ + double globalmin; + /* local minimum during single objective optimization */ + double globalmax; + /* current deviation of the objective value from observed min */ + double currentthreshold; + /* sampled objective values throughout execution */ + std::vector samples; + /* thread number that lead to the objective value in samples vector */ + std::vector threads_samples; + /* frequency index that lead to the objective value in samples vector */ + std::vector freq_samples; + /* true if optimization of objective has converged, false otherwise */ + bool converged; + /* true if optimizer for objective has been initialized, false otherwise */ + bool initialized; + /* index to the parameter vectors for setup that has so far achieved the minimum over all samples */ - long int min_params_idx; - double converged_minimum; - double minimization_params[2]; - }; + long int min_params_idx; + double converged_minimum; + double minimization_params[2]; +}; - - /* structure type modelling an optimization actuation action to be taken +/* structure type modelling an optimization actuation action to be taken by the scheduler */ - struct actuation{ - /* number of threads to resume (>0) or suspend (<0). If set to zero, +struct actuation +{ + /* number of threads to resume (>0) or suspend (<0). If set to zero, number of threads will stay unchanged. */ - unsigned int delta_threads; + unsigned int delta_threads; #if defined(ALLSCALE_HAVE_CPUFREQ) - /* index to the global cpu-supported frequencies vector pointing to + /* index to the global cpu-supported frequencies vector pointing to the new frequency to be set. If set to -1, frequency will stay unchanged */ - int frequency_idx; - int previous_frequency_idx; + int frequency_idx; + int previous_frequency_idx; #endif - }; - - struct localoptimizer - { - localoptimizer() - :nmd(convergence_threshold_), - pending_threads(0.), - pending_energy(0.), - pending_time(0.), - pending_num_times(0.), - mo_initialized(false), +}; + +struct localoptimizer +{ + localoptimizer() + : pending_threads(0.), + pending_energy(0.), + pending_time(0.), + pending_num_times(0.), + mo_initialized(false), #if defined(ALLSCALE_HAVE_CPUFREQ) - frequency_param_(0), + frequency_param_(0), #endif - current_objective_idx_(0),converged_(false) - { - if (optmethod_==random) - srand (std::time(NULL)); - } - - localoptimizer(std::list); - - bool isConverged(){return converged_;} - - void setPolicy(searchPolicy pol){ - optmethod_ = pol; + current_objective_idx_(0), + converged_(false), + convergence_threshold_(0.01), + nmd(0.01) + { + if (optmethod_ == random) + srand(std::time(NULL)); + } + localoptimizer(std::list); + + bool isConverged(); + + void setPolicy(searchPolicy pol) + { + optmethod_ = pol; #ifdef DEBUG_ - std::cout << "Local Optimizer Initialized with " - << policyToString(pol) - << " policy for multi-objective search." - << std::endl; + std::cout << "Local Optimizer Initialized with " + << policyToString(pol) + << " policy for multi-objective search." + << std::endl; #endif - } + } #ifdef ALLSCALE_HAVE_CPUFREQ - void initialize_nmd(); + void initialize_nmd(); #endif - double opt_weights[NMD_NUM_OBJECTIVES]; + double opt_weights[NMD_NUM_OBJECTIVES]; - searchPolicy getPolicy(){return optmethod_;} + searchPolicy getPolicy() { return optmethod_; } - void setobjectives(std::list); + void setobjectives(std::list); - std::size_t getCurrentThreads(){return threads_param_;} + std::size_t getCurrentThreads() { return threads_param_; } - void setCurrentThreads(std::size_t threads){threads_param_ = threads;} + void setCurrentThreads(std::size_t threads) { threads_param_ = threads; } #if defined(ALLSCALE_HAVE_CPUFREQ) - unsigned int getCurrentFrequencyIdx(){return frequency_param_;} - - void setCurrentFrequencyIdx(unsigned int idx){frequency_param_ = idx;} - - const std::vector - setfrequencies(std::vector frequencies){ - frequencies_param_allowed_=frequencies; - //std::cout << "**************** = " << frequency_param_ << std::endl; - //for(auto& el: frequencies_param_allowed_) - // std::cout << "***>>>> " << el << std::endl; - return frequencies_param_allowed_; - } + unsigned int getCurrentFrequencyIdx() + { + return frequency_param_; + } + + void setCurrentFrequencyIdx(unsigned int idx) { frequency_param_ = idx; } + + const std::vector + setfrequencies(std::vector frequencies) + { + frequencies_param_allowed_ = frequencies; + //std::cout << "**************** = " << frequency_param_ << std::endl; + //for(auto& el: frequencies_param_allowed_) + // std::cout << "***>>>> " << el << std::endl; + return frequencies_param_allowed_; + } #endif - std::size_t getmaxthreads() { - return max_threads_; - } + std::size_t getmaxthreads() + { + return max_threads_; + } - void setmaxthreads(std::size_t threads); + void setmaxthreads(std::size_t threads); - /* executes one step of multi-objective optimization */ - actuation step(); + /* executes one step of multi-objective optimization */ + actuation step(); - /* adds a measurement sample to the specified objective */ - void measureObjective(double iter_time, double power, double threads); + /* adds a measurement sample to the specified objective */ + void measureObjective(double iter_time, double power, double threads); - /* restarts multi-objective optimization from current best solution */ - void reset(int,int); + /* restarts multi-objective optimization from current best solution */ + void reset(int, int); #ifdef DEBUG_ - void printobjectives(); - void printverbosesteps(actuation); + void printobjectives(); + void printverbosesteps(actuation); #endif - std::string policyToString(searchPolicy pol){ - std::string str; - switch (pol){ - case random: - str = "random"; - break; - case allscale: - str = "allscale"; - break; - case manual: - str = "manual"; - break; - } - return str; - } - - private: - // VV: Used to convert thread_idx to actual number of threads - std::size_t threads_dt; - - void accumulate_objective_measurements(); - void reset_accumulated_measurements(); - - std::vector samples_energy; - std::vector samples_time; - std::vector samples_threads; - std::vector samples_freq; - - bool explore_knob_domain; - - double pending_time, pending_energy, pending_threads; - unsigned long pending_num_times; - - bool mo_initialized; - - /* vector of active optimization objectives. Objectives are stored + std::string policyToString(searchPolicy pol) + { + std::string str; + switch (pol) + { + case random: + str = "random"; + break; + case allscale: + str = "allscale"; + break; + case manual: + str = "manual"; + break; + } + return str; + } + + private: + // VV: Used to convert thread_idx to actual number of threads + std::size_t threads_dt; + + void accumulate_objective_measurements(); + void reset_accumulated_measurements(); + + std::vector samples_energy; + std::vector samples_time; + std::vector samples_threads; + std::vector samples_freq; + + bool explore_knob_domain; + + double pending_time, pending_energy, pending_threads; + unsigned long pending_num_times; + + bool mo_initialized; + + /* vector of active optimization objectives. Objectives are stored in the vector in decreasing priority order */ - std::vector objectives_; + std::vector objectives_; - NelderMead nmd; + NelderMead nmd; - /* counts number of parameter changes (as pair) */ - unsigned long long int param_changes_; + /* counts number of parameter changes (as pair) */ + unsigned long long int param_changes_; - /* single objective optimization method used */ - searchPolicy optmethod_ = random; + /* single objective optimization method used */ + searchPolicy optmethod_ = random; - /* active optimization parameter - nr of OS threads active */ - int threads_param_; + /* active optimization parameter - nr of OS threads active */ + int threads_param_; - /* ordered set of OS thread values that have been assigned to the + /* ordered set of OS thread values that have been assigned to the runtime by the optimization algorithm. The most recent value is stored at the end of the vector */ - std::vector thread_param_values_; + std::vector thread_param_values_; - /* maximum number of OS threads supported by the runtime */ - std::size_t max_threads_; + /* maximum number of OS threads supported by the runtime */ + std::size_t max_threads_; #if defined(ALLSCALE_HAVE_CPUFREQ) - /* active optimization parameter - current CPU frequency index */ - unsigned int frequency_param_; + /* active optimization parameter - current CPU frequency index */ + unsigned int frequency_param_; - /* ordered set of frequency values that the CPU has been set to by + /* ordered set of frequency values that the CPU has been set to by the optimization algorithm. The most recent value is stored at the end of the vector */ - std::vector frequency_param_values_; + std::vector frequency_param_values_; - /* vector containing sorted list of frequencies supported by the + /* vector containing sorted list of frequencies supported by the processor */ - std::vector frequencies_param_allowed_; + std::vector frequencies_param_allowed_; #endif - /* threshold (percentage in [0,1]) to decide convergence of optimization + /* threshold (percentage in [0,1]) to decide convergence of optimization steps */ - const double convergence_threshold_ = 0.01; + double convergence_threshold_; - /***** optimization state variables ******/ + /***** optimization state variables ******/ - /* index to the _objectives vector of currently optimized objective */ - unsigned short int current_objective_idx_; + /* index to the _objectives vector of currently optimized objective */ + unsigned short int current_objective_idx_; - /* number of times the optimizer step() has been invoked, this is for + /* number of times the optimizer step() has been invoked, this is for init and housekeeping purposes */ - unsigned long long int steps_; + unsigned long long int steps_; - /* currently optimized parameter */ - parameterType current_param_; + /* currently optimized parameter */ + parameterType current_param_; - /* initial warm-up steps */ - const unsigned int warmup_steps_=3; + /* initial warm-up steps */ + const unsigned int warmup_steps_ = 3; - /* maximum number of optimization steps allowed */ - const int max_steps_=100; + /* maximum number of optimization steps allowed */ + const int max_steps_ = 100; - /* set to true if local optimizer has converged over all objectives */ - bool converged_; - }; -} -} + /* set to true if local optimizer has converged over all objectives */ + bool converged_; +}; +} // namespace components +} // namespace allscale #endif diff --git a/allscale/components/nmsimplex_bbincr.hpp b/allscale/components/nmsimplex_bbincr.hpp index 2674517..58844d9 100644 --- a/allscale/components/nmsimplex_bbincr.hpp +++ b/allscale/components/nmsimplex_bbincr.hpp @@ -163,6 +163,9 @@ class NelderMead return rnum; } + bool convergence_reevaluating; + int initial_configurations[NMD_NUM_KNOBS+1][NMD_NUM_KNOBS]; + /* vertex with smallest value */ int vs; diff --git a/src/components/localoptimizer.cpp b/src/components/localoptimizer.cpp index 14158fa..fc8d114 100644 --- a/src/components/localoptimizer.cpp +++ b/src/components/localoptimizer.cpp @@ -126,6 +126,24 @@ void localoptimizer::printobjectives() } } +bool localoptimizer::isConverged() +{ + #if 0 + if ( converged_ == false ) { + return false; + } + + auto timestamp_now = std::chrono::time_point_cast(std::chrono::system_clock::now()).time_since_epoch().count(); + + if ( reexplore_every_ms >0 && timestamp_now - last_convergence_ts > reexplore_every_ms ) + { + std::cout << "[LOCALOPTIMIZER] Re-exploring space!" << std::endl; + initialize_nmd(); + } + #endif + return converged_; +} + void localoptimizer::printverbosesteps(actuation act) { static int last_frequency_idx = 0; @@ -200,6 +218,7 @@ void localoptimizer::initialize_nmd() mo_initialized = true; explore_knob_domain = true; + converged_ = false; } #endif diff --git a/src/components/nmsimplex_bbincr.cpp b/src/components/nmsimplex_bbincr.cpp index c878199..4b51c71 100644 --- a/src/components/nmsimplex_bbincr.cpp +++ b/src/components/nmsimplex_bbincr.cpp @@ -48,6 +48,7 @@ NelderMead::NelderMead(double eps) vm = (double *)malloc(n * sizeof(double)); warming_up_step = 0; + convergence_reevaluating = false; /* allocate the columns of the arrays */ for (i = 0; i <= n; i++) @@ -320,6 +321,23 @@ void NelderMead::initialize_simplex(double weights[3], state_ = warmup; itr = 0; warming_up_step = 0; + convergence_reevaluating = false; + cache_.clear(); + + int threads_low = round(0.25 * (constraint_max[0] - constraint_min[1]) + + constraint_min[1]); + int threads_med = round(0.5 * (constraint_max[0] - constraint_min[1]) + + constraint_min[1]); + int threads_high = constraint_max[0] * 0.75; + + initial_configurations[0][0] = threads_low; + initial_configurations[0][1] = (int)constraint_min[1]; + + initial_configurations[1][0] = threads_med; + initial_configurations[1][1] = (int)constraint_max[1]; + + initial_configurations[2][0] = threads_high; + initial_configurations[2][1] = (int)constraint_max[1]; } /* print out the initial values */ @@ -781,21 +799,6 @@ optstepresult NelderMead::step(const double objectives[]) return step(objectives); } - // VV: Start at 25% threads with lowest CPU Freq, then 75% threads with max freq - // and 100% threads with max freq - - int threads_low = round(0.25 * (constraint_max[0] - constraint_min[1]) - + constraint_min[1]); - int threads_med = round(0.75 * (constraint_max[0] - constraint_min[1]) - + constraint_min[1]); - int threads_max = constraint_max[0]; - - const int initial_configurations[NMD_NUM_KNOBS+1][NMD_NUM_KNOBS] = { - {threads_low, (int)constraint_min[1]}, - {threads_med, (int)constraint_max[1]}, - {threads_max, (int)constraint_max[1]}, - }; - optstepresult res; res.objectives[0] = -1; res.objectives[1] = -1; @@ -872,6 +875,7 @@ bool NelderMead::testConvergence(std::size_t tested_combinations) return true; } #endif + bool ret = false; fsum = 0.0; for (auto j = 0; j <= n; j++) @@ -897,7 +901,7 @@ bool NelderMead::testConvergence(std::size_t tested_combinations) if ( (s >= EPSILON) && (itr <= MAXITERATIONS) && (max_combinations != tested_combinations) ) - return false; + ret = false; else { sort_vertices(); @@ -917,7 +921,32 @@ bool NelderMead::testConvergence(std::size_t tested_combinations) } ) + ret = true; + } + + if ( ret == true && convergence_reevaluating == true ) { return true; + } else if ( ret == true ) { + // VV: Do another final run to make sure that the objective scores still hold up + OUT_DEBUG ( + std::cout << "[NelderMead|Debug] Doing another final search" << std::endl; + ) + state_ = warmup; + warming_up_step = 0; + itr --; + convergence_reevaluating = true; + + for (auto i=0; i Date: Thu, 15 Nov 2018 10:08:55 +0000 Subject: [PATCH 08/37] Improved Power and Active Thread count logging when measuring statistics --- allscale/components/localoptimizer.hpp | 19 ++++ allscale/components/scheduler.hpp | 5 +- src/components/localoptimizer.cpp | 1 + src/components/nmsimplex_bbincr.cpp | 71 ++++++++++++- src/components/scheduler_component.cpp | 142 ++++++++++++------------- 5 files changed, 159 insertions(+), 79 deletions(-) diff --git a/allscale/components/localoptimizer.hpp b/allscale/components/localoptimizer.hpp index d59bf16..cfc43b5 100644 --- a/allscale/components/localoptimizer.hpp +++ b/allscale/components/localoptimizer.hpp @@ -155,6 +155,25 @@ struct localoptimizer const std::vector setfrequencies(std::vector frequencies) { + #if 1 + const std::size_t max_freqs = 10; + std::size_t keep_every = (std::size_t) ceilf(frequencies.size() / (float) max_freqs); + + if ( keep_every > 1 ) { + std::vector new_freqs; + + int i, j, len; + + for (j=0, i=0, len=frequencies.size(); isecond.threads = profiled_threads; + } + vr[0] = profiled_threads; + } + + fr = evaluate_score(objectives, opt_weights); + if ((f[vs] <= fr) && (fr < f[vh])) { // VV: REFLECTED point is better than the SECOND BEST @@ -650,6 +666,22 @@ optstepresult NelderMead::do_step_expand(const double objectives[]) #endif fe = evaluate_score(objectives, nullptr); + // VV: Make sure that we actually profiled what we meant to + int profiled_threads = objectives[2]; + + if ( (int) ve[0] != profiled_threads ) { + std::cout << "[NelderMead|WARN] Meant to profile " << ve[0] << " threads " + "but ended up using " << profiled_threads << std::endl; + + auto key = std::make_pair((int)ve[0], (int)ve[1]); + auto iter = cache_.find(key); + if ( iter != cache_.end() ) { + iter->second.threads = profiled_threads; + } + + ve[0] = profiled_threads; + } + if (fe < fr) { // VV: EXPANDED point is better than REFLECTIVE @@ -686,6 +718,22 @@ optstepresult NelderMead::do_step_contract(const double objectives[]) #endif fc = evaluate_score(objectives, nullptr); + // VV: Make sure that we actually profiled what we meant to + int profiled_threads = objectives[2]; + + if ( (int) vc[0] != profiled_threads ) { + std::cout << "[NelderMead|WARN] Meant to profile " << vc[0] << " threads " + "but ended up using " << profiled_threads << std::endl; + + auto key = std::make_pair((int)vc[0], (int)vc[1]); + auto iter = cache_.find(key); + if ( iter != cache_.end() ) { + iter->second.threads = profiled_threads; + } + + vc[0] = profiled_threads; + } + if (fc <= fr) { // VV: CONTRACTED_O is better than REFLECTED @@ -754,6 +802,16 @@ optstepresult NelderMead::do_step_shrink(const double objectives[]) #endif f[vh] = evaluate_score(objectives, nullptr); + // VV: Make sure that we actually profiled what we meant to + int profiled_threads = objectives[2]; + + if ( (int) v[vh][0] != profiled_threads ) { + std::cout << "[NelderMead|WARN] Meant to profile " << v[vh][0] << " threads " + "but ended up using " << profiled_threads << std::endl; + + v[vh][0] = profiled_threads; + } + const int threads = (int)(v[vh][0]); const int freq_idx = (int)(v[vh][1]); @@ -786,10 +844,18 @@ optstepresult NelderMead::step(const double objectives[]) std::cout << "[NelderMead|DEBUG] State = Warmup " << warming_up_step << std::endl; #endif + // VV: Make sure that we actually profiled what we meant to + int profiled_threads = objectives[2]; + if ( warming_up_step > 0 ) { + if ( (int) v[warming_up_step-1][0] != profiled_threads ) { + std::cout << "[NelderMead|WARN] Meant to profile " << vr[0] << " threads " + "but ended up using " << profiled_threads << std::endl; + v[warming_up_step-1][0] = profiled_threads; + } // VV: Record results of last warming up step f[warming_up_step-1] = evaluate_score(objectives, nullptr); - cache_update(v[warming_up_step-1][0], v[warming_up_step-1][1], + cache_update(profiled_threads, v[warming_up_step-1][1], objectives, true); } @@ -935,6 +1001,7 @@ bool NelderMead::testConvergence(std::size_t tested_combinations) warming_up_step = 0; itr --; convergence_reevaluating = true; + cache_.clear(); for (auto i=0; i(std::chrono::system_clock::now()).time_since_epoch().count(); +// update_active_osthreads(0); +// #ifdef ALLSCALE_HAVE_CPUFREQ +// update_power_consumption(hardware_reconf::read_system_power(), 1); +// #endif #endif rp_ = &hpx::resource::get_partitioner(); @@ -496,25 +497,7 @@ void scheduler::init() { using hardware_reconf = allscale::components::util::hardware_reconf; auto freqs = hardware_reconf::get_frequencies(0); - const std::size_t max_freqs = 5; - std::size_t keep_every = (std::size_t) ceilf(freqs.size() / (float) max_freqs); - - if ( keep_every > 1 ) { - std::vector new_freqs; - - int i, j, len; - - for (j=0, i=0, len=freqs.size(); i freq_temp = - lopt_.setfrequencies(freqs); + auto freq_temp = lopt_.setfrequencies(freqs); if (freq_temp.empty()){ HPX_THROW_EXCEPTION(hpx::bad_request, "scheduler::init", "error in initializing the local optimizer, allowed frequency values are empty"); @@ -769,17 +752,33 @@ void scheduler::optimize_locally(work_item const& work) #endif #ifdef MEASURE_ -#ifdef ALLSCALE_HAVE_CPUFREQ std::size_t temp_id = work.id().id; if ((temp_id >= period_for_power) && (temp_id % period_for_power == 0)) - update_power_consumption(hardware_reconf::read_system_power()); + { + auto timestamp_now = std::chrono::time_point_cast(std::chrono::system_clock::now()).time_since_epoch().count(); + auto dt = timestamp_now - last_measure_power; + + dt = dt > 0 ? dt : 1 ; + + last_measure_power = timestamp_now; + + update_active_osthreads(active_threads, dt); +#ifdef ALLSCALE_HAVE_CPUFREQ + allscale::components::monitor *monitor_c = &allscale::monitor::get(); + auto measurement = monitor_c->get_current_power(); + if ( measurement <= 10000 ) { + update_power_consumption(measurement, dt); + } #endif + } + #endif #ifdef ALLSCALE_HAVE_CPUFREQ if (uselopt && !lopt_.isConverged()) { last_power_usage++; - current_power_usage = hardware_reconf::read_system_power(); + allscale::components::monitor *monitor_c = &allscale::monitor::get(); + current_power_usage = monitor_c->get_current_power(); power_sum += current_power_usage; auto t_now = std::chrono::system_clock::now(); @@ -804,8 +803,7 @@ void scheduler::optimize_locally(work_item const& work) } lopt_.measureObjective(current_avg_iter_time,power_sum/last_power_usage, - // active_threads - lopt_.getCurrentThreads()); + active_threads); last_power_usage=0; power_sum=0; } @@ -1075,9 +1073,9 @@ unsigned int scheduler::suspend_threads(std::size_t suspendthreads) { std::cout << "total active PUs: " << active_threads_ << "\n"; #endif -#ifdef MEASURE_ - update_active_osthreads(active_threads_-active_threads); -#endif +// #ifdef MEASURE_ +// update_active_osthreads(active_threads_-active_threads); +// #endif active_threads = active_threads_; @@ -1140,9 +1138,9 @@ unsigned int scheduler::suspend_threads(std::size_t suspendthreads) { ) ); } -#ifdef MEASURE_ - update_active_osthreads(-1 * suspend_threads.size()); -#endif +// #ifdef MEASURE_ +// update_active_osthreads(-1 * suspend_threads.size()); +// #endif active_threads = active_threads - suspend_threads.size(); @@ -1261,9 +1259,9 @@ unsigned int scheduler::resume_threads(std::size_t resumethreads) { std::cout << "total active PUs: " << active_threads_ << "\n"; #endif -#ifdef MEASURE_ - update_active_osthreads(active_threads_-active_threads); -#endif +// #ifdef MEASURE_ +// update_active_osthreads(active_threads_-active_threads); +// #endif active_threads = active_threads_; // if no thread is suspended, nothing to do @@ -1320,9 +1318,9 @@ unsigned int scheduler::resume_threads(std::size_t resumethreads) { ) ); } -#ifdef MEASURE_ - update_active_osthreads(resume_threads.size()); -#endif +// #ifdef MEASURE_ +// update_active_osthreads(resume_threads.size()); +// #endif active_threads = active_threads + resume_threads.size(); #ifdef DEBUG_THREADSTATUS_ std::cout << "[SCHEDULER|INFO]: Thread Resume - Newly Active Threads: " << active_threads @@ -1441,51 +1439,31 @@ void scheduler::fix_allcores_frequencies(int frequency_idx){ #endif #ifdef MEASURE_ -void scheduler::update_active_osthreads(std::size_t delta) { - std::size_t temp = active_threads + delta; - if (meas_active_threads_max==0) - meas_active_threads_max=temp; +void scheduler::update_active_osthreads(std::size_t threads, int64_t delta_time) { - if (meas_active_threads_min==0) - meas_active_threads_min=temp; + if (meas_active_threads_max==0 || meas_active_threads_max < threads) + meas_active_threads_max=threads; - if (meas_active_threads_sum==0){ - meas_active_threads_count++; - meas_active_threads_sum=active_threads; - return; - } + if (meas_active_threads_min==0 || meas_active_threads_min > threads) + meas_active_threads_min=threads; - if ((temp >= min_threads) && (temp <= os_thread_count)){ - meas_active_threads_count++; - meas_active_threads_sum+=temp; - if (temp > meas_active_threads_max) - meas_active_threads_max=temp; - if (temp < meas_active_threads_min) - meas_active_threads_min=temp; - } + meas_active_threads_count += delta_time; + meas_active_threads_sum += active_threads * delta_time; } -void scheduler::update_power_consumption(std::size_t power_sample) { - if (meas_power_max==0) +void scheduler::update_power_consumption(std::size_t power_sample, int64_t delta_time) +{ + if (meas_power_max==0 || meas_power_max < power_sample) meas_power_max=power_sample; - if (meas_power_min==0) + if (meas_power_min==0 || meas_power_min > power_sample) meas_power_min=power_sample; - if (meas_power_sum==0){ - meas_power_count++; - meas_power_sum=power_sample; - return; - } - if (power_sample <= 10000){ - meas_power_count++; - meas_power_sum+=power_sample; - if (power_sample > meas_power_max) - meas_power_max=power_sample; - if (power_sample < meas_power_min) - meas_power_min=power_sample; - } + meas_power_count += delta_time; + meas_power_sum += power_sample * delta_time; + + std::cout << "Reporting Threads:" << active_threads << " Power:" << power_sample << " for Dt:" << delta_time << std::endl; } #endif @@ -1554,6 +1532,20 @@ void scheduler::stop() { /* Output all measured metrics */ #ifdef DEBUG_MULTIOBJECTIVE_ #ifdef MEASURE_ + auto timestamp_now = std::chrono::time_point_cast(std::chrono::system_clock::now()).time_since_epoch().count(); + auto dt = timestamp_now - last_measure_power; + last_measure_power = timestamp_now; + + update_active_osthreads(active_threads, dt); +#ifdef ALLSCALE_HAVE_CPUFREQ + allscale::components::monitor *monitor_c = &allscale::monitor::get(); + + auto measurement = monitor_c->get_current_power(); + if ( measurement <= 10000 ) { + update_power_consumption(measurement, dt); + } +#endif + std::cout << "\n****************************************************\n" << std::flush; std::cout << "Measured Metrics of Application Execution:\n" From 6eca192d3717b93faf0ab21d5027189f1992a018 Mon Sep 17 00:00:00 2001 From: Vassilis Vassiliadis Date: Thu, 15 Nov 2018 14:41:36 +0000 Subject: [PATCH 09/37] Make monitor::get_current_power() threadsafe Removed freq_idx domain reduction - If we re-enable that feature we probably have to add a map that converts a "fake" freq_idx to the actual one --- allscale/components/localoptimizer.hpp | 3 ++- src/components/monitor_component.cpp | 22 ++++++++++++++-------- src/components/nmsimplex_bbincr.cpp | 5 +++-- 3 files changed, 19 insertions(+), 11 deletions(-) diff --git a/allscale/components/localoptimizer.hpp b/allscale/components/localoptimizer.hpp index cfc43b5..e7a77eb 100644 --- a/allscale/components/localoptimizer.hpp +++ b/allscale/components/localoptimizer.hpp @@ -155,7 +155,7 @@ struct localoptimizer const std::vector setfrequencies(std::vector frequencies) { - #if 1 + #if 0 const std::size_t max_freqs = 10; std::size_t keep_every = (std::size_t) ceilf(frequencies.size() / (float) max_freqs); @@ -224,6 +224,7 @@ struct localoptimizer // VV: Used to convert thread_idx to actual number of threads std::size_t threads_dt; + void accumulate_objective_measurements(); void reset_accumulated_measurements(); diff --git a/src/components/monitor_component.cpp b/src/components/monitor_component.cpp index 947bac4..41570b1 100644 --- a/src/components/monitor_component.cpp +++ b/src/components/monitor_component.cpp @@ -338,19 +338,24 @@ namespace allscale { namespace components { /*VV: Read potentially multiple measurements of power within the span of POWER_MEASUREMENT_PERIOD_MS milliseconds. Each time this function is invoked it returns the running average of power.*/ - static unsigned long long times_read_power=1; - static unsigned long long power_sum = util::hardware_reconf::read_system_power(); + static mutex_type power_mtx; + static unsigned long long times_read_power=0; + static unsigned long long power_sum = 0ull; + static long timestamp_reset_power = 0; - static long timestamp_reset_power = std::chrono::time_point_cast(std::chrono::system_clock::now()).time_since_epoch().count(); - - long t_now = std::chrono::time_point_cast(std::chrono::system_clock::now()).time_since_epoch().count(); + int64_t t_now, dt; + float ret; - auto dt = t_now - timestamp_reset_power; + std::lock_guard lock(power_mtx); + + t_now = std::chrono::time_point_cast(std::chrono::system_clock::now()).time_since_epoch().count(); + + dt = t_now - timestamp_reset_power; times_read_power ++; power_sum += util::hardware_reconf::read_system_power(); - float ret = power_sum / (float)(times_read_power); + ret = power_sum / (float)(times_read_power); if ( dt >= POWER_MEASUREMENT_PERIOD_MS ) { times_read_power = 0; @@ -368,7 +373,8 @@ namespace allscale { namespace components { float monitor::get_max_power() { #if defined(ALLSCALE_HAVE_CPUFREQ) - return 0.0; + // VV: report 125.0 Watt ( this should be dynamically configured/discovered ) + return 1250.0; #elif defined(POWER_ESTIMATE) return allscale::power::estimate_power(get_max_freq(0)) * num_cpus_; #else diff --git a/src/components/nmsimplex_bbincr.cpp b/src/components/nmsimplex_bbincr.cpp index 94e004c..54530c8 100644 --- a/src/components/nmsimplex_bbincr.cpp +++ b/src/components/nmsimplex_bbincr.cpp @@ -849,8 +849,9 @@ optstepresult NelderMead::step(const double objectives[]) if ( warming_up_step > 0 ) { if ( (int) v[warming_up_step-1][0] != profiled_threads ) { - std::cout << "[NelderMead|WARN] Meant to profile " << vr[0] << " threads " - "but ended up using " << profiled_threads << std::endl; + std::cout << "[NelderMead|WARN] Meant to profile " + << v[warming_up_step-1] << " threads " + "but ended up using " << profiled_threads << std::endl; v[warming_up_step-1][0] = profiled_threads; } // VV: Record results of last warming up step From 55517075088ab126f37c76388f7c764b51df9461 Mon Sep 17 00:00:00 2001 From: Vassilis Vassiliadis Date: Thu, 15 Nov 2018 16:59:23 +0000 Subject: [PATCH 10/37] Modifying Score to more closely match the one that Dashboard expects --- src/components/monitor_component.cpp | 6 +++++- src/components/nmsimplex_bbincr.cpp | 2 +- src/dashboard.cpp | 4 +--- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/components/monitor_component.cpp b/src/components/monitor_component.cpp index 594fc39..fde7877 100644 --- a/src/components/monitor_component.cpp +++ b/src/components/monitor_component.cpp @@ -397,7 +397,11 @@ namespace allscale { namespace components { float monitor::get_max_power() { #if defined(ALLSCALE_HAVE_CPUFREQ) - // VV: report 125.0 Watt ( this should be dynamically configured/discovered ) + // VV: report 1250 Watts + // ( redbox paper 5283 for 8335-GTA indicates 1875 for the + // whole node but I've noticed up to ~1100-1200 Watts, for + // the time being this is a good enough figure ) + // ( this should be dynamically configured/discovered ) return 1250.0; #elif defined(POWER_ESTIMATE) return allscale::power::estimate_power(get_max_freq(0)) * num_cpus_; diff --git a/src/components/nmsimplex_bbincr.cpp b/src/components/nmsimplex_bbincr.cpp index 54530c8..f206cc3 100644 --- a/src/components/nmsimplex_bbincr.cpp +++ b/src/components/nmsimplex_bbincr.cpp @@ -284,7 +284,7 @@ double NelderMead::evaluate_score(const double objectives[], const double *weigh #else score = 0.0; for ( auto i=0; iget_current_power(); -#elif defined(POWER_ESTIMATE) +#if defined(POWER_ESTIMATE) || defined(ALLSCALE_HAVE_CPUFREQ) state.cur_power = monitor_c->get_current_power(); state.max_power = monitor_c->get_max_power(); state.power = state.cur_power / state.max_power; From ce0052f693f63fad8a01908b5ffb17c811d8f129 Mon Sep 17 00:00:00 2001 From: Vassilis Vassiliadis Date: Fri, 16 Nov 2018 13:00:40 +0000 Subject: [PATCH 11/37] Cleanup and draft for Local-Optimizer integration with dashboard --- allscale/components/localoptimizer.hpp | 96 +++--------- allscale/components/nmsimplex_bbincr.hpp | 48 +++--- allscale/components/scheduler.hpp | 28 ++-- src/components/localoptimizer.cpp | 159 +++++++------------ src/components/monitor_component.cpp | 6 +- src/components/nmsimplex_bbincr.cpp | 108 +++++++------ src/components/scheduler_component.cpp | 188 +++++++---------------- src/optimizer.cpp | 15 +- src/scheduler.cpp | 36 +++++ 9 files changed, 286 insertions(+), 398 deletions(-) diff --git a/allscale/components/localoptimizer.hpp b/allscale/components/localoptimizer.hpp index e7a77eb..d708d1d 100644 --- a/allscale/components/localoptimizer.hpp +++ b/allscale/components/localoptimizer.hpp @@ -31,12 +31,6 @@ enum objectiveType resource }; -enum parameterType -{ - thread, - frequency -}; - enum searchPolicy { allscale, @@ -44,50 +38,12 @@ enum searchPolicy manual }; -/* structure type of a single optimization objective */ -struct objective -{ - double last_scores[3]; - - objectiveType type; - /* leeway threshold desired, 0-1 double */ - double leeway; - /* non-negative integer priority of the objective, 0 is highest priority*/ - int priority; - /* local minimum during single objective optimization */ - double localmin; - /* local maximum during single objective optimization */ - double localmax; - /* local minimum during single objective optimization */ - double globalmin; - /* local minimum during single objective optimization */ - double globalmax; - /* current deviation of the objective value from observed min */ - double currentthreshold; - /* sampled objective values throughout execution */ - std::vector samples; - /* thread number that lead to the objective value in samples vector */ - std::vector threads_samples; - /* frequency index that lead to the objective value in samples vector */ - std::vector freq_samples; - /* true if optimization of objective has converged, false otherwise */ - bool converged; - /* true if optimizer for objective has been initialized, false otherwise */ - bool initialized; - /* index to the parameter vectors for setup that has so far achieved - the minimum over all samples */ - long int min_params_idx; - double converged_minimum; - double minimization_params[2]; -}; /* structure type modelling an optimization actuation action to be taken by the scheduler */ struct actuation { - /* number of threads to resume (>0) or suspend (<0). If set to zero, - number of threads will stay unchanged. */ - unsigned int delta_threads; + unsigned int threads; #if defined(ALLSCALE_HAVE_CPUFREQ) /* index to the global cpu-supported frequencies vector pointing to @@ -109,16 +65,16 @@ struct localoptimizer #if defined(ALLSCALE_HAVE_CPUFREQ) frequency_param_(0), #endif - current_objective_idx_(0), converged_(false), convergence_threshold_(0.01), + time_weight(0.0), + energy_weight(0.0), + resource_weight(0.0), nmd(0.01) { if (optmethod_ == random) srand(std::time(NULL)); } - localoptimizer(std::list); - bool isConverged(); void setPolicy(searchPolicy pol) @@ -132,13 +88,26 @@ struct localoptimizer #endif } #ifdef ALLSCALE_HAVE_CPUFREQ - void initialize_nmd(); + void initialize_nmd(bool from_scratch); #endif - double opt_weights[NMD_NUM_OBJECTIVES]; - searchPolicy getPolicy() { return optmethod_; } - void setobjectives(std::list); + // VV: Modifying the objectives triggers restarting the optimizer + void setobjectives(double time_weight, + double energy_weight, + double resource_weight); + + void getobjectives(double *time_weight, + double *energy_weight, + double *resource_weight) + { + if ( time_weight != nullptr ) + *time_weight = this->time_weight; + if ( energy_weight != nullptr ) + *energy_weight = this->energy_weight; + if ( resource_weight != nullptr ) + *resource_weight = this->resource_weight; + } std::size_t getCurrentThreads() { return threads_param_; } @@ -221,10 +190,11 @@ struct localoptimizer } private: + double time_weight, energy_weight, resource_weight; + // VV: Used to convert thread_idx to actual number of threads std::size_t threads_dt; - void accumulate_objective_measurements(); void reset_accumulated_measurements(); @@ -240,15 +210,8 @@ struct localoptimizer bool mo_initialized; - /* vector of active optimization objectives. Objectives are stored - in the vector in decreasing priority order */ - std::vector objectives_; - NelderMead nmd; - /* counts number of parameter changes (as pair) */ - unsigned long long int param_changes_; - /* single objective optimization method used */ searchPolicy optmethod_ = random; @@ -283,22 +246,9 @@ struct localoptimizer /***** optimization state variables ******/ - /* index to the _objectives vector of currently optimized objective */ - unsigned short int current_objective_idx_; - - /* number of times the optimizer step() has been invoked, this is for - init and housekeeping purposes */ - unsigned long long int steps_; - - /* currently optimized parameter */ - parameterType current_param_; - /* initial warm-up steps */ const unsigned int warmup_steps_ = 3; - /* maximum number of optimization steps allowed */ - const int max_steps_ = 100; - /* set to true if local optimizer has converged over all objectives */ bool converged_; }; diff --git a/allscale/components/nmsimplex_bbincr.hpp b/allscale/components/nmsimplex_bbincr.hpp index 58844d9..157cb0b 100644 --- a/allscale/components/nmsimplex_bbincr.hpp +++ b/allscale/components/nmsimplex_bbincr.hpp @@ -94,10 +94,16 @@ class NelderMead NelderMead(double); // VV: For the time being // weights = [ W_time, W_energy/power, W_resources ] + // initial_simplex = double[NMD_NUM_KNOBS+1][NMD_NUM_KNOBS] // constraint_min = [min_threads, min_freq_idx] - void initialize_simplex(double weights[NMD_NUM_OBJECTIVES], - double constraint_min[NMD_NUM_KNOBS], - double constraint_max[NMD_NUM_KNOBS]); + void initialize_simplex(const double weights[NMD_NUM_OBJECTIVES], + const double initial_simplex[][NMD_NUM_KNOBS], + const double constraint_min[NMD_NUM_KNOBS], + const double constraint_max[NMD_NUM_KNOBS]); + + void initialize_simplex(const double weights[NMD_NUM_OBJECTIVES], + const double constraint_min[NMD_NUM_KNOBS], + const double constraint_max[NMD_NUM_KNOBS]); void print_initial_simplex(); void print_iteration(); @@ -112,9 +118,16 @@ class NelderMead return min; } + // VV: Returns a [NMD_NUM_KNOS+1][NMD_NUM_KNOBS] array + void get_simplex(double simplex[][NMD_NUM_KNOBS]) { + for (auto i=0; i> freq_times; - std::vector>> objectives_status; + unsigned int freq_step; bool target_freq_found; #endif - unsigned int resource_step; bool target_resource_found; mutable mutex_type throttle_mtx_; @@ -189,9 +193,9 @@ namespace allscale { namespace components { bool resource_requested; bool energy_requested; - double time_leeway; - double resource_leeway; - double energy_leeway; + double time_weight; + double resource_weight; + double energy_weight; unsigned int period_for_time; unsigned int period_for_resource; unsigned int period_for_power; diff --git a/src/components/localoptimizer.cpp b/src/components/localoptimizer.cpp index 19a05e8..5dfd932 100644 --- a/src/components/localoptimizer.cpp +++ b/src/components/localoptimizer.cpp @@ -24,111 +24,48 @@ namespace allscale { namespace components { - #if 0 -localoptimizer::localoptimizer(std::list targetobjectives) - : objectives_((int)targetobjectives.size()), - nmd(convergence_threshold_), - param_changes_(0), - steps_(0), - current_param_(thread), - converged_(false) +void localoptimizer::setobjectives(double time_weight, + double energy_weight, + double resource_weight) { - for (objective o : targetobjectives) - { - //std::cout << o.type << "," << o.leeway << "," << o.priority << '\n'; - objectives_[o.priority] = o; - objectives_[o.priority].localmin = 10000; - objectives_[o.priority].globalmin = 10000; - objectives_[o.priority].localmax = 0.0; - objectives_[o.priority].globalmax = 0.0; - objectives_[o.priority].converged = false; - objectives_[o.priority].initialized = false; - objectives_[o.priority].min_params_idx = 0; - objectives_[o.priority].converged_minimum = 0; - } -#ifdef ALLSCALE_HAVE_CPUFREQ - setCurrentFrequencyIdx(0); -#endif -}; -#endif + this->time_weight = time_weight; + this->energy_weight = energy_weight; + this->resource_weight = resource_weight; -void localoptimizer::setobjectives(std::list targetobjectives) -{ - objectives_.clear(); - objectives_.resize((int)targetobjectives.size()); - - explore_knob_domain = true; - - for (objective o : targetobjectives) - { - //std::cout << o.type << "," << o.leeway << "," << o.priority << '\n'; - objectives_[o.priority] = o; - objectives_[o.priority].localmin = 10000; - objectives_[o.priority].globalmin = 10000; - objectives_[o.priority].localmax = 0.0; - objectives_[o.priority].globalmax = 0.0; - objectives_[o.priority].converged = false; - objectives_[o.priority].initialized = false; - objectives_[o.priority].min_params_idx = 0; - objectives_[o.priority].converged_minimum = 0; - - opt_weights[o.type] = o.leeway; - } - steps_ = 0; - param_changes_ = 0; - current_param_ = thread; #ifdef ALLSCALE_HAVE_CPUFREQ setCurrentFrequencyIdx(0); #endif - converged_ = false; + + // VV: Modifying the objectives triggers restarting the optimizer + // from scratch + initialize_nmd(true); } void localoptimizer::reset(int threads, int freq_idx) { threads_param_ = threads; - param_changes_ = 0; thread_param_values_.clear(); #ifdef ALLSCALE_HAVE_CPUFREQ frequency_param_ = freq_idx; frequency_param_values_.clear(); #endif - current_objective_idx_ = 0; - steps_ = 0; - current_param_ = thread; converged_ = false; }; #ifdef DEBUG_ void localoptimizer::printobjectives() { - for (auto &el : objectives_) - { - std::cout << "Objective" - << "\t\t" - << "Priority" - << "\t\t" - << "Leeway" << std::endl; - switch (el.type) - { - case time: - std::cout << "Time" - << "\t\t" << el.priority << "\t\t" << el.leeway << std::endl; - break; - case energy: - std::cout << "Energy" - << "\t\t" << el.priority << "\t\t" << el.leeway << std::endl; - break; - case resource: - std::cout << "Resource" - << "\t\t" << el.priority << "\t\t" << el.leeway << std::endl; - break; - } - } + std::cout << "[LocalOptimizer|DEBUG] Weights=[time:" << time_weight + << ", energy:" << energy_weight + << ", resource:" << resource_weight << "]" << std::endl; } +#endif bool localoptimizer::isConverged() { #if 0 + // VV: This is an attempt to make optimization choices for + // tasks of smaller granularity (after splitting a task) if ( converged_ == false ) { return false; } @@ -166,8 +103,6 @@ void localoptimizer::printverbosesteps(actuation act) #endif } -#endif - void localoptimizer::accumulate_objective_measurements() { if (pending_num_times) @@ -205,16 +140,30 @@ void localoptimizer::setmaxthreads(std::size_t threads) } #ifdef ALLSCALE_HAVE_CPUFREQ -void localoptimizer::initialize_nmd() +void localoptimizer::initialize_nmd(bool from_scratch) { - // VV: Place reasonable limits to #threads and cpu_freq tunable knobs + // VV: Place constraints to #threads and cpu_freq tunable knobs double constraint_min[] = {1, 0}; double constraint_max[] = {ceil(max_threads_/(double)threads_dt), (double)frequencies_param_allowed_.size() - 1}; + const double opt_weights[] = { time_weight, energy_weight, resource_weight }; - nmd.initialize_simplex(opt_weights, - constraint_min, constraint_max); + if( from_scratch == false ){ + double prev_simplex[NMD_NUM_KNOBS+1][NMD_NUM_KNOBS]; + + nmd.get_simplex(prev_simplex); + + nmd.initialize_simplex(opt_weights, + prev_simplex, + constraint_min, + constraint_max); + } else { + nmd.initialize_simplex(opt_weights, + nullptr, + constraint_min, + constraint_max); + } mo_initialized = true; explore_knob_domain = true; @@ -229,13 +178,10 @@ void localoptimizer::measureObjective(double iter_time, double power, double thr << power << " " << threads << std::endl; - if (steps_) - { - pending_time += iter_time; - pending_energy += power; - pending_threads += threads; - pending_num_times++; - } + pending_time += iter_time; + pending_energy += power; + pending_threads += threads; + pending_num_times++; } void localoptimizer::reset_accumulated_measurements() @@ -248,28 +194,25 @@ void localoptimizer::reset_accumulated_measurements() actuation localoptimizer::step() { - - steps_++; actuation act; - act.delta_threads = threads_param_; + act.threads = threads_param_; #ifdef ALLSCALE_HAVE_CPUFREQ act.frequency_idx = frequency_param_; #endif /* random optimization step */ if (optmethod_ == random) { - act.delta_threads = (rand() % max_threads_); + act.threads = (rand() % max_threads_); #ifdef ALLSCALE_HAVE_CPUFREQ act.frequency_idx = rand() % frequencies_param_allowed_.size(); - // if (act.frequency_idx == frequency_param_) - // act.frequency_idx = -1; #endif } #ifdef ALLSCALE_HAVE_CPUFREQ else if (optmethod_ == allscale) { + // VV: Keep track of dirty objectives if (mo_initialized == false) - initialize_nmd(); + initialize_nmd(true); accumulate_objective_measurements(); const double latest_measurements[] = {pending_time, @@ -297,34 +240,36 @@ actuation localoptimizer::step() std::cout << "[LOCALOPTIMIZER|INFO] Minimal Objective Value = " << min_score << " Threads = " << minimization_point[0] << " Freq_idx = " << minimization_point[1] << std::endl; std::cout << "******************************************" << std::endl; #endif - act.delta_threads = minimization_point[0]; + act.threads = minimization_point[0]; act.frequency_idx = minimization_point[1]; // VV: Stop searching for new knob_set explore_knob_domain = false; converged_ = true; } else { // VV: Have not converged yet, keep exploring - act.delta_threads = nmd_res.threads; + act.threads = nmd_res.threads; act.frequency_idx = nmd_res.freq_idx; } - act.delta_threads *= threads_dt; + act.threads *= threads_dt; +#ifdef DEBUG_MULTIOBJECTIVE_ std::cout << "[LOCALOPTIMIZER|DEBUG] ACTUAL Vertex to try:"; - std::cout << " Threads = " << act.delta_threads; + std::cout << " Threads = " << act.threads; std::cout << " Freq Idx = " << act.frequency_idx << std::endl; +#endif } } #endif // ALLSCALE_HAVE_CPUFREQ validate_act: - if (act.delta_threads > max_threads_) + if (act.threads > max_threads_) { - act.delta_threads = max_threads_; + act.threads = max_threads_; } - else if (act.delta_threads < 1) + else if (act.threads < 1) { - act.delta_threads = getCurrentThreads(); + act.threads = getCurrentThreads(); } #ifdef ALLSCALE_HAVE_CPUFREQ // VV: If freq_idx is -1 then set it to last used frequency (frequency_param_) diff --git a/src/components/monitor_component.cpp b/src/components/monitor_component.cpp index fde7877..d1817ae 100644 --- a/src/components/monitor_component.cpp +++ b/src/components/monitor_component.cpp @@ -397,12 +397,12 @@ namespace allscale { namespace components { float monitor::get_max_power() { #if defined(ALLSCALE_HAVE_CPUFREQ) - // VV: report 1250 Watts + // VV: report 1100 Watts // ( redbox paper 5283 for 8335-GTA indicates 1875 for the - // whole node but I've noticed up to ~1100-1200 Watts, for + // whole node but I've noticed up to ~1100 Watts, for // the time being this is a good enough figure ) // ( this should be dynamically configured/discovered ) - return 1250.0; + return 1100.0; #elif defined(POWER_ESTIMATE) return allscale::power::estimate_power(get_max_freq(0)) * num_cpus_; #else diff --git a/src/components/nmsimplex_bbincr.cpp b/src/components/nmsimplex_bbincr.cpp index f206cc3..864f3fd 100644 --- a/src/components/nmsimplex_bbincr.cpp +++ b/src/components/nmsimplex_bbincr.cpp @@ -36,25 +36,9 @@ NelderMead::NelderMead(double eps) #endif itr = 0; state_ = warmup; - - /* dynamically allocate arrays */ - - /* allocate the rows of the arrays */ - v = (double **)malloc((n + 1) * sizeof(double *)); - f = (double *)malloc((n + 1) * sizeof(double)); - vr = (double *)malloc(n * sizeof(double)); - ve = (double *)malloc(n * sizeof(double)); - vc = (double *)malloc(n * sizeof(double)); - vm = (double *)malloc(n * sizeof(double)); warming_up_step = 0; convergence_reevaluating = false; - - /* allocate the columns of the arrays */ - for (i = 0; i <= n; i++) - { - v[i] = (double *)malloc(n * sizeof(double)); - } } std::pair NelderMead::explore_next_extra(double *extra, int level, @@ -196,17 +180,6 @@ void NelderMead::generate_new(F &gen) void NelderMead::my_constraints(double x[]) { - // round to integer and bring again with allowable margins - // todo fix: generalize - - // if (x[0] < constraint_min[0] || x[0] > constraint_max[0]){ - // x[0] = (constraint_min[0] + constraint_max[0])/2; - // } - - // if (x[1] < constraint_min[1] || x[1] > constraint_max[1]){ - // x[1] = (constraint_min[1] + constraint_max[1])/2; - // } - for (auto i = 0u; i < 2u; ++i) { if (x[i] < constraint_min[i]) @@ -268,7 +241,7 @@ double NelderMead::evaluate_score(const double objectives[], const double *weigh { double score; // VV: [time, energy/power, resources] - double scale[] = {1.0, 1000.0, 1.0}; + double scale[] = {1.0, 1100., 1.0}; scale[2] = (double)constraint_max[0]; if (weights == nullptr) @@ -282,7 +255,7 @@ double NelderMead::evaluate_score(const double objectives[], const double *weigh score += t * t * weights[i]; } #else - score = 0.0; + score = 1.0; for ( auto i=0; i(std::chrono::system_clock::now()).time_since_epoch().count(); @@ -323,21 +295,57 @@ void NelderMead::initialize_simplex(double weights[3], warming_up_step = 0; convergence_reevaluating = false; cache_.clear(); +} + +/* FIXME: generalize */ +void NelderMead::initialize_simplex(const double weights[3], + const double initial_simplex[][NMD_NUM_KNOBS], + const double constraint_min[2], + const double constraint_max[2]) +{ + int i, j; + long timestamp_now = std::chrono::time_point_cast(std::chrono::system_clock::now()).time_since_epoch().count(); + + for (i = 0; i < NMD_NUM_KNOBS; i++) + { + this->constraint_min[i] = constraint_min[i]; + this->constraint_max[i] = constraint_max[i]; + } - int threads_low = round(0.25 * (constraint_max[0] - constraint_min[1]) - + constraint_min[1]); - int threads_med = round(0.5 * (constraint_max[0] - constraint_min[1]) - + constraint_min[1]); - int threads_high = constraint_max[0] * 0.75; + set_weights(weights); + state_ = warmup; + itr = 0; + warming_up_step = 0; + convergence_reevaluating = false; + cache_.clear(); + if (initial_simplex == nullptr) + { + int threads_low = round(0.25 * (constraint_max[0] - constraint_min[1]) + + constraint_min[1]); + int threads_med = round(0.5 * (constraint_max[0] - constraint_min[1]) + + constraint_min[1]); + int threads_high = constraint_max[0] * 0.75; - initial_configurations[0][0] = threads_low; - initial_configurations[0][1] = (int)constraint_min[1]; + initial_configurations[0][0] = threads_low; + initial_configurations[0][1] = (int)constraint_min[1]; - initial_configurations[1][0] = threads_med; - initial_configurations[1][1] = (int)constraint_max[1]; + initial_configurations[1][0] = threads_med; + initial_configurations[1][1] = (int)constraint_max[1]; - initial_configurations[2][0] = threads_high; - initial_configurations[2][1] = (int)constraint_max[1]; + initial_configurations[2][0] = threads_high; + initial_configurations[2][1] = (int)constraint_max[1]; + } else { + double knob_set[NMD_NUM_KNOBS]; + for (i=0; i 0 ) { + if ( warming_up_step > 0 && warming_up_step < NMD_NUM_KNOBS + 1) { if ( (int) v[warming_up_step-1][0] != profiled_threads ) { std::cout << "[NelderMead|WARN] Meant to profile " << v[warming_up_step-1] << " threads " @@ -858,12 +866,14 @@ optstepresult NelderMead::step(const double objectives[]) f[warming_up_step-1] = evaluate_score(objectives, nullptr); cache_update(profiled_threads, v[warming_up_step-1][1], objectives, true); - } + } if ( warming_up_step == NMD_NUM_KNOBS + 1) { // VV: We need not explore the knob_set space anymore state_ = start; return step(objectives); + } else if (warming_up_step > NMD_NUM_KNOBS + 1) { + std::cout << "[NelderMead|Warn] Unknown warmup step " << warming_up_step << std::endl; } optstepresult res; @@ -1010,7 +1020,9 @@ bool NelderMead::testConvergence(std::size_t tested_combinations) } } - print_initial_simplex(); + OUT_DEBUG ( + print_initial_simplex(); + ) return false; } else { diff --git a/src/components/scheduler_component.cpp b/src/components/scheduler_component.cpp index 9b522d8..6875533 100644 --- a/src/components/scheduler_component.cpp +++ b/src/components/scheduler_component.cpp @@ -57,7 +57,6 @@ scheduler::scheduler(std::uint64_t rank) target_freq_found(false) #endif , - resource_step(1), target_resource_found(false), sampling_interval(10), current_avg_iter_time(0.0), @@ -65,9 +64,9 @@ scheduler::scheduler(std::uint64_t rank) time_requested(false), resource_requested(false), energy_requested(false), - time_leeway(1.0), - resource_leeway(1.0), - energy_leeway(1.0), + time_weight(0.0), + resource_weight(0.0), + energy_weight(0.0), period_for_time(10), period_for_resource(10), period_for_power(20), @@ -193,14 +192,11 @@ std::size_t scheduler::get_num_numa_cores(std::size_t domain) { * */ void scheduler::init() { - - std::vector objectives_priorities; - int objectives_priority_idx=0; - std::size_t num_localities = allscale::get_num_localities(); std::unique_lock l(resize_mtx_); hpx::util::ignore_while_checking> il(&l); + if (initialized_) return; @@ -281,95 +277,53 @@ void scheduler::init() { #ifdef DEBUG_INIT_ std::cout << "Scheduling Objective provided: " << obj << "\n"; #endif - // Don't scale objectives if none is given - double leeway = 1.0; + // VV: Don't scale objectives if none is given + double opt_weight = 1.0; if (idx != std::string::npos) { #ifdef DEBUG_INIT_ - std::cout << "Found a leeway, triggering multi-objectives policies\n" - << std::flush; + std::cout << "Found an optimization weight, triggering " + "multi-objectives policies\n" << std::flush; #endif multi_objectives = true; obj = objective_str.substr(0, idx); - leeway = std::stod(objective_str.substr(idx + 1)); + opt_weight = std::stod(objective_str.substr(idx + 1)); } if (obj == "time") { time_requested = true; - objectives_priorities.push_back(time); -#ifdef DEBUG_INIT_ - std::cout << "Priority[" << objectives_priority_idx << "]=" << objectives_priorities[objectives_priority_idx] - << std::endl; -#endif - time_leeway = leeway; + time_weight = opt_weight; #ifdef DEBUG_INIT_ - std::cout << "Set time margin to " << time_leeway << "\n" << std::flush; + std::cout << "Set time weight to " << time_weight << "\n" << std::flush; #endif - } else if (obj == "resource") { - resource_requested = true; - objectives_priorities.push_back(resource); + resource_requested = true; + resource_weight = opt_weight; #ifdef DEBUG_INIT_ - std::cout << "Priority[" << objectives_priority_idx << "]=" << objectives_priorities[objectives_priority_idx] - << std::endl; -#endif - resource_leeway = leeway; -#ifdef DEBUG_INIT_ - std::cout << "Set resource margin to " << resource_leeway << "\n" + std::cout << "Set resource weight to " << resource_weight << "\n" << std::flush; - ; #endif } else if (obj == "energy") { - energy_requested = true; - objectives_priorities.push_back(energy); -#ifdef DEBUG_INIT_ - std::cout << "Priority[" << objectives_priority_idx << "]=" << objectives_priorities[objectives_priority_idx] - << std::endl; -#endif - energy_leeway = leeway; + energy_requested = true; + energy_weight = opt_weight; #ifdef DEBUG_INIT_ - std::cout << "Set energy margin to " << energy_leeway << "\n" + std::cout << "Set energy weight to " << energy_weight << "\n" << std::flush; - ; #endif } else { - std::ostringstream all_keys; - copy(scheduler::objectives.begin(), scheduler::objectives.end(), - std::ostream_iterator(all_keys, ",")); - std::string keys_str = all_keys.str(); - keys_str.pop_back(); HPX_THROW_EXCEPTION( hpx::bad_request, "scheduler::init", boost::str( - boost::format("Wrong objective: %s, Valid values: [%s]") % obj % - keys_str)); + boost::format("Wrong objective: Valid values: [time, energy, resource]"))); } - if (time_leeway > 1 || resource_leeway > 1 || energy_leeway > 1) { + if (time_weight > 2 || resource_weight > 2 || energy_weight > 2 + || time_weight < -2 || resource_weight < -2 || energy_weight < -2) { HPX_THROW_EXCEPTION(hpx::bad_request, "scheduler::init", - "leeways should be within ]0, 1]"); + "Objective weights should be within [-2, 2]"); } - objectives_priority_idx++; - } - } - objectives_priority_idx--; - - /* Reading optional user provided input for granularity (step) of - adding/removing resources to/from the runtime (where resource=OS thread) */ - std::string input_resource_step_str = - hpx::get_config_entry("allscale.resource_step", ""); - if (!input_resource_step_str.empty()) { - - resource_step = std::stoul(input_resource_step_str); -#ifdef DEBUG_INIT_ - std::cout << "Resource step provided : " << resource_step << "\n"; -#endif - if (resource_step == 0 || resource_step >= os_thread_count) { - HPX_THROW_EXCEPTION( - hpx::bad_request, "scheduler::init", - "resource step should be within ]0, total nb threads["); } } @@ -400,16 +354,13 @@ void scheduler::init() { #if defined(ALLSCALE_HAVE_CPUFREQ) if (multi_objectives) { - // reallocating objectives_status vector of vectors - objectives_status.resize(3); - for (int i = 0; i < 3; i++) { - objectives_status[i].resize(3); - } + #ifdef DEBUG_INIT_ std::cout << "\n****************************************************\n" << std::flush; - std::cout << "Policy selected: multi-objective set with time=" << time_leeway - << ", resource=" << resource_leeway - << ", energy=" << energy_leeway << "\n" + std::cout << "Policy selected: multi-objective set with time=" << time_weight + << ", energy=" << energy_weight + << ", resource=" << resource_weight + << "\n" << std::flush; std::cout << "Objectives Flags Set: \n" << "\tTime: " << time_requested << @@ -447,53 +398,9 @@ void scheduler::init() { last_optimization_timestamp_ = t_duration_now; last_objective_measurement_timestamp_= t_duration_now; - std::list objectives_temp; - if (energy_requested){ - objective o_temp; - o_temp.type=energy; - o_temp.leeway=energy_leeway; - int i=0; - for(auto& el: objectives_priorities){ - if (el==energy){ - o_temp.priority=i; - break; - } - ++i; - } - objectives_temp.push_back(o_temp); - } - if (time_requested){ - objective o_temp; - o_temp.type=time; - o_temp.leeway=time_leeway; - int i=0; - for(auto& el: objectives_priorities){ - if (el==time){ - o_temp.priority=i; - break; - } - ++i; - } - objectives_temp.push_back(o_temp); - } - if (resource_requested){ - objective o_temp; - o_temp.type=resource; - o_temp.leeway=resource_leeway; - int i=0; - for(auto& el: objectives_priorities){ - if (el==resource){ - o_temp.priority=i; - break; - } - ++i; - } - objectives_temp.push_back(o_temp); - } - lopt_.setobjectives(objectives_temp); lopt_.setmaxthreads(os_thread_count); - lopt_.reset(os_thread_count,0); - #if defined(ALLSCALE_HAVE_CPUFREQ) + + #if defined(ALLSCALE_HAVE_CPUFREQ) using hardware_reconf = allscale::components::util::hardware_reconf; auto freqs = hardware_reconf::get_frequencies(0); @@ -502,7 +409,16 @@ void scheduler::init() { HPX_THROW_EXCEPTION(hpx::bad_request, "scheduler::init", "error in initializing the local optimizer, allowed frequency values are empty"); } - #endif + // VV: Set to max number of threads and max frequency + lopt_.reset(os_thread_count, freqs.size()-1); +#else + // VV: Max number of threads, and an arbitrary frequency index + lopt_.reset(os_thread_count,0); +#endif + + // VV: Set objectives after setting all constraints to + // trigger the initialization of nmd + lopt_.setobjectives(time_weight, energy_weight, resource_weight); #ifdef DEBUG_ lopt_.printobjectives(); #endif @@ -819,32 +735,46 @@ void scheduler::optimize_locally(work_item const& work) #endif // amend threads if signaled - if (act_temp.delta_threads < active_threads){ + if (act_temp.threads < active_threads){ #ifdef DEBUG_MULTIOBJECTIVE_ std::cout << "[SCHEDULER|INFO]: Active Threads = " << active_threads << " out of " << lopt_.getmaxthreads() - << " , target threads = " << act_temp.delta_threads << std::endl; + << " , target threads = " << act_temp.threads << std::endl; #endif //unsigned int suspended_temp = suspend_threads(new_threads_target); //lopt_.setCurrentThreads(lopt_.getCurrentThreads()-suspended_temp); - suspend_threads(active_threads-act_temp.delta_threads); + suspend_threads(active_threads-act_temp.threads); } - else if (act_temp.delta_threads > active_threads){ + else if (act_temp.threads > active_threads){ #ifdef DEBUG_MULTIOBJECTIVE_ std::cout << "[SCHEDULER|INFO]: Active Threads = " << active_threads << " out of " << lopt_.getmaxthreads() - << " , target threads = " << act_temp.delta_threads << std::endl; + << " , target threads = " << act_temp.threads << std::endl; #endif - resume_threads(act_temp.delta_threads - active_threads); + resume_threads(act_temp.threads - active_threads); } fix_allcores_frequencies(act_temp.frequency_idx); lopt_.setCurrentFrequencyIdx(act_temp.frequency_idx); - lopt_.setCurrentThreads(act_temp.delta_threads); + lopt_.setCurrentThreads(act_temp.threads); } } // uselopt #endif } } +void scheduler::set_local_optimizer_weights(double time_weight, + double energy_weight, + double resource_weight) +{ + lopt_.setobjectives(time_weight, energy_weight, resource_weight); +} + +void scheduler::get_local_optimizer_weights(double *time_weight, + double *energy_weight, + double *resource_weight) +{ + lopt_.getobjectives(time_weight, energy_weight, resource_weight); +} + std::pair> scheduler::schedule_local(work_item work, std::unique_ptr&& reqs, runtime::HierarchyAddress const& addr) diff --git a/src/optimizer.cpp b/src/optimizer.cpp index fc90f8b..e919fb6 100644 --- a/src/optimizer.cpp +++ b/src/optimizer.cpp @@ -126,6 +126,19 @@ tuning_objective get_default_objective() return tuning_objective::efficiency(); if (obj == "power") return tuning_objective::power(); + if ( obj == "local") { + double time_weight, energy_weight, resource_weight; + + auto &&local_scheduler = scheduler::get(); + + local_scheduler.get_local_optimizer_weights(&time_weight, + &energy_weight, + &resource_weight); + // VV: If the local-optimizer is used too then copy its objectives + return tuning_objective(time_weight, + resource_weight, + energy_weight); + } float speed = 0.0f; float efficiency = 0.0f; @@ -233,7 +246,7 @@ void global_optimizer::tune(std::vector const &state) total_efficiency += state[i].load_ * (float(state[i].active_frequency_ * state[i].cores_per_node_) / float(max_frequency * state[i].cores_per_node_));; used_power += state[i].energy_; } -#ifdef POWER_ESTIMATE +#if defined(POWER_ESTIMATE) || defined(ALLSCALE_HAVE_CPUFREQ) max_power += monitor_c->get_max_power(); #endif } diff --git a/src/scheduler.cpp b/src/scheduler.cpp index 485abc9..d88d568 100644 --- a/src/scheduler.cpp +++ b/src/scheduler.cpp @@ -370,18 +370,54 @@ namespace allscale { std::lock_guard l(optimizer_.mtx_); optimizer_.objective_.speed_exponent = exp; + double time_weight, energy_weight, resource_weight; + + auto &&local_scheduler = scheduler::get(); + + local_scheduler.get_local_optimizer_weights(&time_weight, + &energy_weight, + &resource_weight); + time_weight = (double) exp; + + local_scheduler.set_local_optimizer_weights(time_weight, + energy_weight, + resource_weight); } void set_efficiency_exponent(float exp) { std::lock_guard l(optimizer_.mtx_); optimizer_.objective_.efficiency_exponent = exp; + double time_weight, energy_weight, resource_weight; + + auto &&local_scheduler = scheduler::get(); + + local_scheduler.get_local_optimizer_weights(&time_weight, + &energy_weight, + &resource_weight); + resource_weight = (double) exp; + + local_scheduler.set_local_optimizer_weights(time_weight, + energy_weight, + resource_weight); } void set_power_exponent(float exp) { std::lock_guard l(optimizer_.mtx_); optimizer_.objective_.power_exponent = exp; + double time_weight, energy_weight, resource_weight; + + auto &&local_scheduler = scheduler::get(); + + local_scheduler.get_local_optimizer_weights(&time_weight, + &energy_weight, + &resource_weight); + energy_weight = (double) exp; + + local_scheduler.set_local_optimizer_weights(time_weight, + energy_weight, + resource_weight); } hpx::util::tuple get_optimizer_exponents() From d1ea9d64b7f2c817496a698cbdde44c519f3377a Mon Sep 17 00:00:00 2001 From: Vassilis Vassiliadis Date: Fri, 16 Nov 2018 14:48:30 +0000 Subject: [PATCH 12/37] Report avg_iteration_time as "speed" and number of threads as "efficiency" - This is only the case when CPUFreq is used - I've also modified the dashboard --- src/components/nmsimplex_bbincr.cpp | 11 +++++++++-- src/dashboard.cpp | 5 +++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/src/components/nmsimplex_bbincr.cpp b/src/components/nmsimplex_bbincr.cpp index 864f3fd..94cea8a 100644 --- a/src/components/nmsimplex_bbincr.cpp +++ b/src/components/nmsimplex_bbincr.cpp @@ -139,6 +139,7 @@ void NelderMead::generate_new(F &gen) max_level *= 2; max_nested_level *=2; + auto timestamp_now = std::chrono::time_point_cast(std::chrono::system_clock::now()).time_since_epoch().count(); int is_same; do @@ -147,7 +148,13 @@ void NelderMead::generate_new(F &gen) auto key = std::make_pair((int)new_set[0], (int)new_set[1]); auto entry = cache_.find(key); - is_same = (entry != cache_.end()); + + is_same = 0; + + if ( entry != cache_.end() ) { + auto dt = timestamp_now - entry->second._cache_timestamp; + is_same = dt <= entry->second._cache_expires_dt; + } if ( ( level < max_level +1) && is_same @@ -281,7 +288,7 @@ void NelderMead::initialize_simplex(const double weights[3], const double constraint_max[2]) { int i, j; - long timestamp_now = std::chrono::time_point_cast(std::chrono::system_clock::now()).time_since_epoch().count(); + auto timestamp_now = std::chrono::time_point_cast(std::chrono::system_clock::now()).time_since_epoch().count(); for (i = 0; i < NMD_NUM_KNOBS; i++) { diff --git a/src/dashboard.cpp b/src/dashboard.cpp index fc8f9f8..99bd6fb 100644 --- a/src/dashboard.cpp +++ b/src/dashboard.cpp @@ -57,8 +57,13 @@ namespace allscale { namespace dashboard state.productive_cycles_per_second = float(state.cur_frequency) * (1.f - state.idle_rate); // freq to Hz +#ifdef ALLSCALE_HAVE_CPUFREQ + state.speed = monitor_c->get_avg_time_last_iterations(100); + state.efficiency = active_cores; +#else state.speed = 1.f - state.idle_rate; state.efficiency = state.speed * (float(state.cur_frequency * active_cores) / float(state.max_frequency * state.num_cores)); +#endif #if defined(POWER_ESTIMATE) || defined(ALLSCALE_HAVE_CPUFREQ) state.cur_power = monitor_c->get_current_power(); From 711479b3ec72e0d740a89393fe06878f6bbdd35a Mon Sep 17 00:00:00 2001 From: Vassilis Vassiliadis Date: Fri, 16 Nov 2018 20:16:08 +0000 Subject: [PATCH 13/37] Randomize initial simplex --- allscale/components/nmsimplex_bbincr.hpp | 5 +- src/components/localoptimizer.cpp | 6 +- src/components/nmsimplex_bbincr.cpp | 70 ++++++++++++++++++++---- src/components/scheduler_component.cpp | 32 +++++------ 4 files changed, 80 insertions(+), 33 deletions(-) diff --git a/allscale/components/nmsimplex_bbincr.hpp b/allscale/components/nmsimplex_bbincr.hpp index 157cb0b..441041e 100644 --- a/allscale/components/nmsimplex_bbincr.hpp +++ b/allscale/components/nmsimplex_bbincr.hpp @@ -66,7 +66,6 @@ struct optstepresult int freq_idx; /******VV: Cache stuff******/ - double score; double objectives[3]; // (time, energy, resource) // VV: _cache_expires denotes dt (in ms) after _cache_timestamp int64_t _cache_timestamp, _cache_expires_dt; @@ -126,7 +125,7 @@ class NelderMead } unsigned long int getIterations() { return itr; } - double evaluate_score(const double objectives[], const double *weights) const; + double evaluate_score(const double objectives[], const double *weights); void set_weights(const double weights[]); optstepresult step(const double objectives[]); @@ -134,6 +133,8 @@ class NelderMead private: int warming_up_step; + double max_power_, max_time_; + // VV: Utility to make sure that we generate new values and not something that already // exists in the set of NMD_NUM_KNOBS+1 configuration points template diff --git a/src/components/localoptimizer.cpp b/src/components/localoptimizer.cpp index 5dfd932..8dde2fa 100644 --- a/src/components/localoptimizer.cpp +++ b/src/components/localoptimizer.cpp @@ -38,7 +38,9 @@ void localoptimizer::setobjectives(double time_weight, // VV: Modifying the objectives triggers restarting the optimizer // from scratch - initialize_nmd(true); + + mo_initialized = false; + converged_ = false; } void localoptimizer::reset(int threads, int freq_idx) @@ -252,6 +254,8 @@ actuation localoptimizer::step() } act.threads *= threads_dt; + + threads_param_ = act.threads; #ifdef DEBUG_MULTIOBJECTIVE_ std::cout << "[LOCALOPTIMIZER|DEBUG] ACTUAL Vertex to try:"; std::cout << " Threads = " << act.threads; diff --git a/src/components/nmsimplex_bbincr.cpp b/src/components/nmsimplex_bbincr.cpp index 94cea8a..e09c685 100644 --- a/src/components/nmsimplex_bbincr.cpp +++ b/src/components/nmsimplex_bbincr.cpp @@ -37,6 +37,9 @@ NelderMead::NelderMead(double eps) itr = 0; state_ = warmup; + max_power_ = 900.; + max_time_ = 3.2; + warming_up_step = 0; convergence_reevaluating = false; } @@ -244,11 +247,17 @@ bool NelderMead::cache_update(int threads, int freq_idx, return false; } -double NelderMead::evaluate_score(const double objectives[], const double *weights) const +double NelderMead::evaluate_score(const double objectives[], const double *weights) { double score; // VV: [time, energy/power, resources] - double scale[] = {1.0, 1100., 1.0}; + double scale[] = {1.0, 1.0, 1.0}; + + max_time_ = max_time_ > objectives[0] ? max_time_ : objectives[0]; + max_power_ = max_power_ > objectives[2] ? max_power_ : objectives[2]; + + scale[0] = max_time_; + scale[1] = max_power_; scale[2] = (double)constraint_max[0]; if (weights == nullptr) @@ -327,6 +336,7 @@ void NelderMead::initialize_simplex(const double weights[3], cache_.clear(); if (initial_simplex == nullptr) { + #if 0 int threads_low = round(0.25 * (constraint_max[0] - constraint_min[1]) + constraint_min[1]); int threads_med = round(0.5 * (constraint_max[0] - constraint_min[1]) @@ -336,11 +346,38 @@ void NelderMead::initialize_simplex(const double weights[3], initial_configurations[0][0] = threads_low; initial_configurations[0][1] = (int)constraint_min[1]; - initial_configurations[1][0] = threads_med; - initial_configurations[1][1] = (int)constraint_max[1]; + initial_configurations[1][0] = threads_high; + initial_configurations[1][1] = (int)constraint_min[1]; initial_configurations[2][0] = threads_high; initial_configurations[2][1] = (int)constraint_max[1]; + #else + for (i=0; i double* { for (j = 0; j < NMD_NUM_KNOBS; j++) - vr[j] = vm[j] + ALPHA * (vm[j] - v[vg][j]) + extra[j]; + vr[j] = vm[j] + ALPHA * (vm[j] - v[vg][j]) - extra[j]; my_constraints(vr); @@ -567,7 +604,7 @@ optstepresult NelderMead::do_step_reflect(const double objectives[]) // VV: REFLECTED is better than BEST auto gen_new = [this](double *extra) mutable -> double* { for (j = 0; j < NMD_NUM_KNOBS; j++) - ve[j] = vm[j] + GAMMA * (vr[j] - vm[j]) + extra[j]; + ve[j] = vm[j] + GAMMA * (vr[j] - vm[j]) - extra[j]; my_constraints(ve); @@ -604,7 +641,7 @@ optstepresult NelderMead::do_step_reflect(const double objectives[]) // VV: REFLECTED between SECOND BEST and WORST auto gen_new = [this](double *extra) mutable -> double* { for (j = 0; j < NMD_NUM_KNOBS; j++) - vc[j] = vm[j] + BETA * (vr[j] - vm[j]) + extra[j]; + vc[j] = vm[j] + BETA * (vr[j] - vm[j]) - extra[j]; my_constraints(vc); @@ -641,7 +678,7 @@ optstepresult NelderMead::do_step_reflect(const double objectives[]) // VV: REFLECTED worse than WORST auto gen_new = [this](double *extra) mutable -> double* { for (j = 0; j < NMD_NUM_KNOBS; j++) - vc[j] = vm[j] - BETA * (vr[j] - vm[j]) + extra[j]; + vc[j] = vm[j] - BETA * (vr[j] - vm[j]) - extra[j]; my_constraints(vc); @@ -772,7 +809,7 @@ optstepresult NelderMead::do_step_contract(const double objectives[]) auto gen_new = [this, &new_vh](double *extra) mutable -> double* { for (auto j = 0; j < NMD_NUM_KNOBS; j++) - new_vh[j] = v[vs][j] + DELTA * (v[vh][j] - v[vs][j]) + extra[j]; + new_vh[j] = v[vs][j] + DELTA * (v[vh][j] - v[vs][j]) - extra[j]; my_constraints(new_vh); @@ -851,6 +888,17 @@ optstepresult NelderMead::step(const double objectives[]) std::size_t tested_combinations = cache_.size(); + evaluate_score(objectives, nullptr); + + for (i=0; isecond.objectives, nullptr); + } + } + switch (state_) { case warmup: @@ -865,7 +913,7 @@ optstepresult NelderMead::step(const double objectives[]) if ( warming_up_step > 0 && warming_up_step < NMD_NUM_KNOBS + 1) { if ( (int) v[warming_up_step-1][0] != profiled_threads ) { std::cout << "[NelderMead|WARN] Meant to profile " - << v[warming_up_step-1] << " threads " + << v[warming_up_step-1][0] << " threads " "but ended up using " << profiled_threads << std::endl; v[warming_up_step-1][0] = profiled_threads; } @@ -888,7 +936,7 @@ optstepresult NelderMead::step(const double objectives[]) res.objectives[1] = -1; res.objectives[2] = -1; res.converged = false; - res.score = -1; + res.threads = initial_configurations[warming_up_step][0]; res.freq_idx = initial_configurations[warming_up_step][1]; diff --git a/src/components/scheduler_component.cpp b/src/components/scheduler_component.cpp index 6875533..8bf9946 100644 --- a/src/components/scheduler_component.cpp +++ b/src/components/scheduler_component.cpp @@ -569,7 +569,7 @@ void scheduler::initialize_cpu_frequencies() { // Make sure frequency change happened before continuing std::cout << "topo.num_logical_cores: " << topo.num_logical_cores - << "topo.num_hw_threads" << topo.num_hw_threads << "\n" + << " topo.num_hw_threads" << topo.num_hw_threads << "\n" << std::flush; { // check status of Pus frequency @@ -647,7 +647,6 @@ void scheduler::optimize_locally(work_item const& work) // find out which pool has the most threads /* Count Active threads for validation*/ - hpx::threads::mask_type active_mask; std::size_t domain_active_threads = 0; std::size_t pool_idx = 0; @@ -665,15 +664,16 @@ void scheduler::optimize_locally(work_item const& work) } } std::cout << "Active OS Threads = " << total_threads_counted << std::endl; + #endif #ifdef MEASURE_ - std::size_t temp_id = work.id().id; - if ((temp_id >= period_for_power) && (temp_id % period_for_power == 0)) - { - auto timestamp_now = std::chrono::time_point_cast(std::chrono::system_clock::now()).time_since_epoch().count(); - auto dt = timestamp_now - last_measure_power; + auto timestamp_now = std::chrono::time_point_cast(std::chrono::system_clock::now()).time_since_epoch().count(); + auto dt = timestamp_now - last_measure_power; + if (dt >= 5000) + { + dt = dt > 0 ? dt : 1 ; last_measure_power = timestamp_now; @@ -736,25 +736,19 @@ void scheduler::optimize_locally(work_item const& work) // amend threads if signaled if (act_temp.threads < active_threads){ -#ifdef DEBUG_MULTIOBJECTIVE_ - std::cout << "[SCHEDULER|INFO]: Active Threads = " << active_threads << " out of " << lopt_.getmaxthreads() - << " , target threads = " << act_temp.threads << std::endl; - -#endif - //unsigned int suspended_temp = suspend_threads(new_threads_target); - //lopt_.setCurrentThreads(lopt_.getCurrentThreads()-suspended_temp); suspend_threads(active_threads-act_temp.threads); } else if (act_temp.threads > active_threads){ -#ifdef DEBUG_MULTIOBJECTIVE_ - std::cout << "[SCHEDULER|INFO]: Active Threads = " << active_threads << " out of " << lopt_.getmaxthreads() - << " , target threads = " << act_temp.threads << std::endl; -#endif resume_threads(act_temp.threads - active_threads); } fix_allcores_frequencies(act_temp.frequency_idx); lopt_.setCurrentFrequencyIdx(act_temp.frequency_idx); - lopt_.setCurrentThreads(act_temp.threads); + lopt_.setCurrentThreads(active_threads); + +#ifdef DEBUG_MULTIOBJECTIVE_ + std::cout << "[SCHEDULER|INFO]: Active Threads = " << active_threads << " out of " << lopt_.getmaxthreads() + << " , target threads = " << act_temp.threads << ", set threads to " << active_threads << std::endl; +#endif } } // uselopt #endif From 9b30eac1c1a9a14a75abd2dda8516ae84e04779c Mon Sep 17 00:00:00 2001 From: Vassilis Vassiliadis Date: Sun, 18 Nov 2018 15:47:27 +0000 Subject: [PATCH 14/37] Better logistics and modified objective score slightly --- allscale/components/scheduler.hpp | 2 +- src/components/nmsimplex_bbincr.cpp | 10 ++--- src/components/scheduler_component.cpp | 61 +++++++++++++------------- 3 files changed, 37 insertions(+), 36 deletions(-) diff --git a/allscale/components/scheduler.hpp b/allscale/components/scheduler.hpp index 9437eae..5ff8848 100644 --- a/allscale/components/scheduler.hpp +++ b/allscale/components/scheduler.hpp @@ -101,7 +101,7 @@ namespace allscale { namespace components { void update_active_osthreads(std::size_t threads, int64_t delta_time); void update_power_consumption(std::size_t power_sample, int64_t delta_time); #endif - int64_t last_measure_power; + int64_t last_measure_power, last_measure_threads; void fix_allcores_frequencies(int index); diff --git a/src/components/nmsimplex_bbincr.cpp b/src/components/nmsimplex_bbincr.cpp index e09c685..2b6820b 100644 --- a/src/components/nmsimplex_bbincr.cpp +++ b/src/components/nmsimplex_bbincr.cpp @@ -37,8 +37,8 @@ NelderMead::NelderMead(double eps) itr = 0; state_ = warmup; - max_power_ = 900.; - max_time_ = 3.2; + max_power_ = 1.0; + max_time_ = 30.0; warming_up_step = 0; convergence_reevaluating = false; @@ -253,8 +253,8 @@ double NelderMead::evaluate_score(const double objectives[], const double *weigh // VV: [time, energy/power, resources] double scale[] = {1.0, 1.0, 1.0}; - max_time_ = max_time_ > objectives[0] ? max_time_ : objectives[0]; - max_power_ = max_power_ > objectives[2] ? max_power_ : objectives[2]; + // max_time_ = max_time_ > objectives[0] ? max_time_ : objectives[0]; + // max_power_ = max_power_ > objectives[2] ? max_power_ : objectives[2]; scale[0] = max_time_; scale[1] = max_power_; @@ -910,7 +910,7 @@ optstepresult NelderMead::step(const double objectives[]) // VV: Make sure that we actually profiled what we meant to int profiled_threads = objectives[2]; - if ( warming_up_step > 0 && warming_up_step < NMD_NUM_KNOBS + 1) { + if ( warming_up_step > 0 && warming_up_step <= NMD_NUM_KNOBS + 1) { if ( (int) v[warming_up_step-1][0] != profiled_threads ) { std::cout << "[NelderMead|WARN] Meant to profile " << v[warming_up_step-1][0] << " threads " diff --git a/src/components/scheduler_component.cpp b/src/components/scheduler_component.cpp index 8bf9946..8568988 100644 --- a/src/components/scheduler_component.cpp +++ b/src/components/scheduler_component.cpp @@ -202,6 +202,7 @@ void scheduler::init() { #ifdef MEASURE_ last_measure_power = std::chrono::time_point_cast(std::chrono::system_clock::now()).time_since_epoch().count(); + last_measure_threads = last_measure_power; // update_active_osthreads(0); // #ifdef ALLSCALE_HAVE_CPUFREQ // update_power_consumption(hardware_reconf::read_system_power(), 1); @@ -667,29 +668,6 @@ void scheduler::optimize_locally(work_item const& work) #endif -#ifdef MEASURE_ - auto timestamp_now = std::chrono::time_point_cast(std::chrono::system_clock::now()).time_since_epoch().count(); - auto dt = timestamp_now - last_measure_power; - - if (dt >= 5000) - { - - dt = dt > 0 ? dt : 1 ; - - last_measure_power = timestamp_now; - - update_active_osthreads(active_threads, dt); -#ifdef ALLSCALE_HAVE_CPUFREQ - allscale::components::monitor *monitor_c = &allscale::monitor::get(); - auto measurement = monitor_c->get_current_power(); - if ( measurement <= 10000 ) { - update_power_consumption(measurement, dt); - } -#endif - } - -#endif - #ifdef ALLSCALE_HAVE_CPUFREQ if (uselopt && !lopt_.isConverged()) { last_power_usage++; @@ -704,6 +682,9 @@ void scheduler::optimize_locally(work_item const& work) long elapsedTimeMs = t_duration_now - last_objective_measurement_timestamp_; + auto dt_power = t_duration_now - last_measure_power; + update_power_consumption(power_sum/last_power_usage, dt_power); + if (elapsedTimeMs > objective_measurement_period_ms){ last_objective_measurement_timestamp_= t_duration_now; @@ -718,7 +699,7 @@ void scheduler::optimize_locally(work_item const& work) current_avg_iter_time = 0.0; } - lopt_.measureObjective(current_avg_iter_time,power_sum/last_power_usage, + lopt_.measureObjective(current_avg_iter_time,power_sum/(last_power_usage*monitor_c->get_max_power()), active_threads); last_power_usage=0; power_sum=0; @@ -733,8 +714,9 @@ void scheduler::optimize_locally(work_item const& work) #ifdef DEBUG_MULTIOBJECTIVE_ lopt_.printverbosesteps(act_temp); #endif - // amend threads if signaled - + auto dt_threads = t_duration_now - last_measure_threads; + update_active_osthreads(active_threads, dt_threads); + last_measure_threads = t_duration_now; if (act_temp.threads < active_threads){ suspend_threads(active_threads-act_temp.threads); } @@ -750,7 +732,20 @@ void scheduler::optimize_locally(work_item const& work) << " , target threads = " << act_temp.threads << ", set threads to " << active_threads << std::endl; #endif } - } // uselopt + } + #ifdef MEASURE_ + else { + auto timestamp_now = std::chrono::time_point_cast(std::chrono::system_clock::now()).time_since_epoch().count(); + auto dt = timestamp_now - last_measure_power; + if ( dt >= 1000 ) { + allscale::components::monitor *monitor_c = &allscale::monitor::get(); + auto cur_power = monitor_c->get_current_power(); + + update_power_consumption(cur_power, dt); + last_measure_power = timestamp_now; + } + } + #endif #endif } } @@ -1377,6 +1372,9 @@ void scheduler::update_active_osthreads(std::size_t threads, int64_t delta_time) void scheduler::update_power_consumption(std::size_t power_sample, int64_t delta_time) { + if ( power_sample > 10000) + return; + if (meas_power_max==0 || meas_power_max < power_sample) meas_power_max=power_sample; @@ -1457,16 +1455,19 @@ void scheduler::stop() { #ifdef DEBUG_MULTIOBJECTIVE_ #ifdef MEASURE_ auto timestamp_now = std::chrono::time_point_cast(std::chrono::system_clock::now()).time_since_epoch().count(); - auto dt = timestamp_now - last_measure_power; + auto dt_threads = timestamp_now - last_measure_threads; + auto dt_power = timestamp_now - last_measure_power; + last_measure_power = timestamp_now; + last_measure_threads = timestamp_now; - update_active_osthreads(active_threads, dt); + update_active_osthreads(active_threads, dt_threads); #ifdef ALLSCALE_HAVE_CPUFREQ allscale::components::monitor *monitor_c = &allscale::monitor::get(); auto measurement = monitor_c->get_current_power(); if ( measurement <= 10000 ) { - update_power_consumption(measurement, dt); + update_power_consumption(measurement, dt_power); } #endif From 0d80ac7fb3c3a416ef1302b39ce5ec361dbf2cd9 Mon Sep 17 00:00:00 2001 From: Vassilis Vassiliadis Date: Sun, 18 Nov 2018 16:28:30 +0000 Subject: [PATCH 15/37] Reporting current system score plus keeping track of thread logistics --- allscale/components/localoptimizer.hpp | 2 +- allscale/components/nmsimplex_bbincr.hpp | 2 +- allscale/components/scheduler.hpp | 5 +++++ allscale/dashboard.hpp | 2 +- src/components/localoptimizer.cpp | 9 ++++++++ src/components/scheduler_component.cpp | 28 +++++++----------------- src/dashboard.cpp | 7 +++++- 7 files changed, 31 insertions(+), 24 deletions(-) diff --git a/allscale/components/localoptimizer.hpp b/allscale/components/localoptimizer.hpp index d708d1d..bb6b325 100644 --- a/allscale/components/localoptimizer.hpp +++ b/allscale/components/localoptimizer.hpp @@ -76,7 +76,7 @@ struct localoptimizer srand(std::time(NULL)); } bool isConverged(); - + double evaluate_score(const double objectives[]); void setPolicy(searchPolicy pol) { optmethod_ = pol; diff --git a/allscale/components/nmsimplex_bbincr.hpp b/allscale/components/nmsimplex_bbincr.hpp index 441041e..66eed17 100644 --- a/allscale/components/nmsimplex_bbincr.hpp +++ b/allscale/components/nmsimplex_bbincr.hpp @@ -64,7 +64,7 @@ struct optstepresult double threads; /* index to frequency vector for freq parameter to set for sampling*/ int freq_idx; - + /******VV: Cache stuff******/ double objectives[3]; // (time, energy, resource) // VV: _cache_expires denotes dt (in ms) after _cache_timestamp diff --git a/allscale/components/scheduler.hpp b/allscale/components/scheduler.hpp index 5ff8848..706b5a9 100644 --- a/allscale/components/scheduler.hpp +++ b/allscale/components/scheduler.hpp @@ -72,6 +72,10 @@ namespace allscale { namespace components { void get_local_optimizer_weights(double *time_weight, double *energy_weight, double *resource_weight); + + double get_last_objective_score() { + return last_objective_score; + } private: std::size_t get_num_numa_nodes(); @@ -101,6 +105,7 @@ namespace allscale { namespace components { void update_active_osthreads(std::size_t threads, int64_t delta_time); void update_power_consumption(std::size_t power_sample, int64_t delta_time); #endif + double last_objective_score; int64_t last_measure_power, last_measure_threads; void fix_allcores_frequencies(int index); diff --git a/allscale/dashboard.hpp b/allscale/dashboard.hpp index 73670a2..385f4f1 100644 --- a/allscale/dashboard.hpp +++ b/allscale/dashboard.hpp @@ -89,7 +89,7 @@ namespace allscale { namespace dashboard // current power usage / max power usage \in [0..1] float power = 0; - + std::string to_json() const; template diff --git a/src/components/localoptimizer.cpp b/src/components/localoptimizer.cpp index 8dde2fa..f0a36d4 100644 --- a/src/components/localoptimizer.cpp +++ b/src/components/localoptimizer.cpp @@ -24,6 +24,15 @@ namespace allscale { namespace components { + +double localoptimizer::evaluate_score(const double objectives[]) +{ + if ( mo_initialized ) { + return nmd.evaluate_score(objectives, nullptr); + } + + return -1.0; +} void localoptimizer::setobjectives(double time_weight, double energy_weight, double resource_weight) diff --git a/src/components/scheduler_component.cpp b/src/components/scheduler_component.cpp index 8568988..5d4d9fb 100644 --- a/src/components/scheduler_component.cpp +++ b/src/components/scheduler_component.cpp @@ -86,6 +86,7 @@ scheduler::scheduler(std::uint64_t rank) #endif , nr_opt_steps(0), + last_objective_score(-1.0), uselopt(false) { allscale_monitor = &allscale::monitor::get(); @@ -203,10 +204,6 @@ void scheduler::init() { #ifdef MEASURE_ last_measure_power = std::chrono::time_point_cast(std::chrono::system_clock::now()).time_since_epoch().count(); last_measure_threads = last_measure_power; -// update_active_osthreads(0); -// #ifdef ALLSCALE_HAVE_CPUFREQ -// update_power_consumption(hardware_reconf::read_system_power(), 1); -// #endif #endif rp_ = &hpx::resource::get_partitioner(); @@ -683,6 +680,7 @@ void scheduler::optimize_locally(work_item const& work) long elapsedTimeMs = t_duration_now - last_objective_measurement_timestamp_; auto dt_power = t_duration_now - last_measure_power; + last_measure_power = t_duration_now; update_power_consumption(power_sum/last_power_usage, dt_power); if (elapsedTimeMs > objective_measurement_period_ms){ @@ -698,11 +696,14 @@ void scheduler::optimize_locally(work_item const& work) #endif current_avg_iter_time = 0.0; } - + double last_objectives[] = {current_avg_iter_time,power_sum/(last_power_usage*monitor_c->get_max_power()), + active_threads}; lopt_.measureObjective(current_avg_iter_time,power_sum/(last_power_usage*monitor_c->get_max_power()), active_threads); last_power_usage=0; power_sum=0; + + last_objective_score = lopt_.evaluate_score(last_objectives); } elapsedTimeMs = t_duration_now - last_optimization_timestamp_; @@ -729,7 +730,7 @@ void scheduler::optimize_locally(work_item const& work) #ifdef DEBUG_MULTIOBJECTIVE_ std::cout << "[SCHEDULER|INFO]: Active Threads = " << active_threads << " out of " << lopt_.getmaxthreads() - << " , target threads = " << act_temp.threads << ", set threads to " << active_threads << std::endl; + << " , target threads = " << act_temp.threads << std::endl; #endif } } @@ -992,10 +993,6 @@ unsigned int scheduler::suspend_threads(std::size_t suspendthreads) { std::cout << "total active PUs: " << active_threads_ << "\n"; #endif -// #ifdef MEASURE_ -// update_active_osthreads(active_threads_-active_threads); -// #endif - active_threads = active_threads_; growing = false; @@ -1057,9 +1054,6 @@ unsigned int scheduler::suspend_threads(std::size_t suspendthreads) { ) ); } -// #ifdef MEASURE_ -// update_active_osthreads(-1 * suspend_threads.size()); -// #endif active_threads = active_threads - suspend_threads.size(); @@ -1178,10 +1172,6 @@ unsigned int scheduler::resume_threads(std::size_t resumethreads) { std::cout << "total active PUs: " << active_threads_ << "\n"; #endif -// #ifdef MEASURE_ -// update_active_osthreads(active_threads_-active_threads); -// #endif - active_threads = active_threads_; // if no thread is suspended, nothing to do if (domain_blocked_threads == 0) { @@ -1237,9 +1227,6 @@ unsigned int scheduler::resume_threads(std::size_t resumethreads) { ) ); } -// #ifdef MEASURE_ -// update_active_osthreads(resume_threads.size()); -// #endif active_threads = active_threads + resume_threads.size(); #ifdef DEBUG_THREADSTATUS_ std::cout << "[SCHEDULER|INFO]: Thread Resume - Newly Active Threads: " << active_threads @@ -1368,6 +1355,7 @@ void scheduler::update_active_osthreads(std::size_t threads, int64_t delta_time) meas_active_threads_count += delta_time; meas_active_threads_sum += active_threads * delta_time; + std::cout <<"REGISTERING THREADS " << threads << " for " << delta_time << std::endl; } void scheduler::update_power_consumption(std::size_t power_sample, int64_t delta_time) diff --git a/src/dashboard.cpp b/src/dashboard.cpp index 99bd6fb..fcf7e8b 100644 --- a/src/dashboard.cpp +++ b/src/dashboard.cpp @@ -70,7 +70,6 @@ namespace allscale { namespace dashboard state.max_power = monitor_c->get_max_power(); state.power = state.cur_power / state.max_power; #endif - return state; } }} @@ -169,9 +168,15 @@ namespace allscale { namespace dashboard float system_state::score() const { +#ifdef ALLSCALE_HAVE_CPUFREQ + return std::exp(speed * speed_exponent) * + std::exp(efficiency * efficiency_exponent ) * + std::exp(power * power_exponent); +#else return std::pow(speed, speed_exponent) * std::pow(efficiency, efficiency_exponent) * std::pow(1 - power, power_exponent); +#endif } template void node_state::serialize(hpx::serialization::input_archive& ar, unsigned); From 5a5b30fdd6c5238e2e3c979a7ff9b3b3ad37150d Mon Sep 17 00:00:00 2001 From: Vassilis Vassiliadis Date: Mon, 19 Nov 2018 13:47:47 +0000 Subject: [PATCH 16/37] Initial prototype of new ino_nmd --- allscale/components/localoptimizer.hpp | 4 +- allscale/components/nmsimplex_bbincr.hpp | 1 + allscale/components/scheduler.hpp | 4 + allscale/optimizer.hpp | 20 +- src/components/nmsimplex_bbincr.cpp | 79 ++++++ src/components/scheduler_component.cpp | 8 +- src/optimizer.cpp | 301 ++++++++++++++++++++++- src/scheduler.cpp | 20 +- 8 files changed, 417 insertions(+), 20 deletions(-) diff --git a/allscale/components/localoptimizer.hpp b/allscale/components/localoptimizer.hpp index bb6b325..722520c 100644 --- a/allscale/components/localoptimizer.hpp +++ b/allscale/components/localoptimizer.hpp @@ -66,11 +66,11 @@ struct localoptimizer frequency_param_(0), #endif converged_(false), - convergence_threshold_(0.01), + convergence_threshold_(0.005), time_weight(0.0), energy_weight(0.0), resource_weight(0.0), - nmd(0.01) + nmd(0.005) { if (optmethod_ == random) srand(std::time(NULL)); diff --git a/allscale/components/nmsimplex_bbincr.hpp b/allscale/components/nmsimplex_bbincr.hpp index 66eed17..8ed77dc 100644 --- a/allscale/components/nmsimplex_bbincr.hpp +++ b/allscale/components/nmsimplex_bbincr.hpp @@ -90,6 +90,7 @@ class NelderMead { public: + NelderMead(const NelderMead &other); NelderMead(double); // VV: For the time being // weights = [ W_time, W_energy/power, W_resources ] diff --git a/allscale/components/scheduler.hpp b/allscale/components/scheduler.hpp index 706b5a9..f728526 100644 --- a/allscale/components/scheduler.hpp +++ b/allscale/components/scheduler.hpp @@ -65,6 +65,10 @@ namespace allscale { namespace components { { return active_threads; } + + std::size_t get_total_threads() const { + return os_thread_count; + } void set_local_optimizer_weights(double time_weight, double energy_weight, diff --git a/allscale/optimizer.hpp b/allscale/optimizer.hpp index fc64428..7f019fa 100644 --- a/allscale/optimizer.hpp +++ b/allscale/optimizer.hpp @@ -11,6 +11,8 @@ #include #include +#include + #include #include @@ -23,6 +25,7 @@ namespace allscale { float avg_time_; unsigned long long energy_; std::uint64_t active_frequency_; + std::size_t active_cores_per_node_; std::size_t cores_per_node_; template @@ -33,6 +36,7 @@ namespace allscale { ar & avg_time_; ar & energy_; ar & active_frequency_; + ar & active_cores_per_node_; ar & cores_per_node_; } }; @@ -87,6 +91,14 @@ namespace allscale { , f_resource_max(other.f_resource_max) , f_resource_leeway(other.f_resource_leeway) , o_ino(std::move(o_ino)) + // VV: Used by balance_ino_nmd + , nmd_initialized(other.nmd_initialized) + , nmd(other.nmd) + , nodes_min(other.nodes_min) + , nodes_max(other.nodes_max) + , threads_min(other.threads_min) + , threads_max(other.threads_max) + , previous_num_nodes(other.previous_num_nodes) {} bool active() const @@ -96,6 +108,7 @@ namespace allscale { hpx::future balance(bool); hpx::future balance_ino(const std::vector &old_mapping); + hpx::future balance_ino_nmd(const std::vector &old_mapping); hpx::future decide_random_mapping(const std::vector &old_mapping); bool may_rebalance(); @@ -104,7 +117,7 @@ namespace allscale { std::size_t u_steps_till_rebalance; void tune(std::vector const& state); - + int nmd_initialized; std::vector active_nodes_; std::uint64_t active_frequency_; @@ -118,9 +131,14 @@ namespace allscale { std::vector localities_; + // VV: balance_ino and balance_global data float f_resource_max, f_resource_leeway; + std::size_t previous_num_nodes; + int nodes_min, nodes_max, threads_min, threads_max; components::internode_optimizer_t o_ino; + + components::NelderMead nmd; }; } diff --git a/src/components/nmsimplex_bbincr.cpp b/src/components/nmsimplex_bbincr.cpp index 2b6820b..9f42307 100644 --- a/src/components/nmsimplex_bbincr.cpp +++ b/src/components/nmsimplex_bbincr.cpp @@ -26,6 +26,45 @@ namespace allscale namespace components { + +NelderMead::NelderMead(const NelderMead &other) +{ + EPSILON = other.EPSILON; + state_ = other.state_; + max_power_ = other.max_power_; + max_time_ = other.max_time_; + + cache_.insert(other.cache_.begin(), other.cache_.end()); + warming_up_step = other.warming_up_step; + convergence_reevaluating = other.convergence_reevaluating; + + fc = other.fc; + fe = other.fe; + vs = other.vs; + vg = other.vg; + vh = other.vh; + + for (auto i=0; i #include #include +#include #include #include @@ -38,7 +39,7 @@ namespace allscale allscale::components::monitor *monitor_c = &allscale::monitor::get(); float power_now = 100.f; #if defined(POWER_ESTIMATE) || defined(ALLSCALE_HAVE_CPUFREQ) - power_now = monitor_c->get_current_power(); + power_now = monitor_c->get_current_power() / monitor_c->get_max_power(); #endif // VV: Use power as if it were energy return { @@ -47,7 +48,8 @@ namespace allscale my_time, power_now, float(monitor_c->get_current_freq(0)), - scheduler::get().get_active_threads() + scheduler::get().get_active_threads(), + scheduler::get().get_total_threads() }; } // optimizer_state get_optimizer_state() @@ -185,11 +187,15 @@ global_optimizer::global_optimizer() active_nodes_(allscale::get_num_localities(), true), tuner_(new simple_coordinate_descent(tuner_configuration{active_nodes_, allscale::monitor::get().get_current_freq(0)})), objective_(get_default_objective()), active_(true), localities_(hpx::find_all_localities()), - f_resource_max(-1.0f), f_resource_leeway(-1.0f) + f_resource_max(-1.0f), f_resource_leeway(-1.0f), + nmd(0.005), + nmd_initialized(0), + nodes_min(1), nodes_max(localities_.size()), threads_min(0), threads_max(0) { char *const c_policy = std::getenv("ALLSCALE_SCHEDULING_POLICY"); + previous_num_nodes = localities_.size(); - if (c_policy && strncasecmp(c_policy, "ino", 3) == 0 ) + if (c_policy && strcasecmp(c_policy, "ino") == 0 ) { char *const c_resource_max = std::getenv("ALLSCALE_RESOURCE_MAX"); char *const c_resource_leeway = std::getenv("ALLSCALE_RESOURCE_LEEWAY"); @@ -207,20 +213,30 @@ global_optimizer::global_optimizer() f_resource_max = 0.75f; else f_resource_max = atof(c_resource_max); + + nodes_min = f_resource_leeway * localities_.size(); + nodes_max = localities_.size(); + + if ( nodes_min < 1 ) + nodes_min = 1; + } + if ( c_policy && strcasecmp(c_policy, "ino")) o_ino = allscale::components::internode_optimizer_t(localities_.size(), (double) f_resource_max, (double) f_resource_leeway, INO_DEFAULT_FORGET_AFTER); + + if ( c_policy && strcasecmp(c_policy, "ino_nmd")) { + char *const c_threads_min = std::getenv("ALLSCALE_GINO_THREADS_MIN"); + char *const c_threads_max = std::getenv("ALLSCALE_GINO_THREADS_MAX"); + + if ( c_threads_min ) + threads_min = atoi(c_threads_min); + + if ( c_threads_max ) + threads_max = atoi(c_threads_max); } -// else if ( strncasecmp(c_policy, "truly_random", 12) == 0 ) { -// char *const c_balance_every = std::getenv("ALLSCALE_TRULY_RANDOM_BALANCE_EVERY"); -// -// if ( c_balance_every ) { -// u_balance_every = (std::size_t) atoi(c_balance_every); -// u_steps_till_rebalance = u_balance_every; -// } -// } } void global_optimizer::tune(std::vector const &state) @@ -419,6 +435,267 @@ hpx::future global_optimizer::decide_random_mapping(const std::vector global_optimizer::balance_ino_nmd(const std::vector &old_mapping) +{ + u_steps_till_rebalance = u_balance_every; + return hpx::lcos::broadcast(localities_) + .then( + [this, old_mapping](hpx::future > future_state) { + std::lock_guard l(mtx_); + + auto state = future_state.get(); + float avg_time = 0; + float avg_energy = 0; + float avg_threads = 0; + int from_node = 0; + + std::size_t num_avg_time = 0ul; + + for (const auto &s:state) { + if ( s.avg_time_ > 0.0) { + avg_time += s.avg_time_; + num_avg_time ++; + } + avg_energy += s.energy_; + avg_threads += s.active_cores_per_node_ / (float) s.cores_per_node_; + std::cout << "From " << from_node + << " t:" << s.avg_time_ + << " e:" << s.energy_ + << " h:" << s.active_cores_per_node_ / (float) s.cores_per_node_ + << " (" << s.active_cores_per_node_ << ", " + < " + << constraint_max[0] << " and " + << constraint_min[1] << " -> " + << constraint_max[1] << std::endl; + } + nmd.initialize_simplex(weights, + nullptr, + constraint_min, + constraint_max); + + nmd_initialized = 1; + } + + auto action = nmd.step(measurements); + // VV: Todo do something with the action + // assume that .threads = nodes and .freq_idx = threads per node + int new_num_nodes = action.threads; + int new_threads_per_node = action.freq_idx; + + if ( new_num_nodes != previous_num_nodes ) { + // VV: Need to redistribute tasks to nodes. + // Try to move as few as possible tasks + /* VV: Balancing algorithm: + new_avg_tasks = ceil(total_tasks / new_num_nodes) + node_to_tasks{} = find out which tasks each node is computing() + + if ( new_num_nodes < previous_nodes ) { + // VV: Evenly distribute all now orphaned tasks to remaining nodes + orphaned_tasks = those which were running on the now unused nodes + for ( node:new_used_nodes ) { + old_tasks = size(node_to_tasks[node]) + added_to_node = 0; + while (remaining_orphaned + && added_to_node < new_avg_tasks-old_tasks) { + orphan = orphaned.pop() + node.tasks.push_back(orphan) + added_to_node ++; + } + } + } else if ( new_num_nodes > previous_node ) { + num_need_to_move = new_avg_tasks; + node_to_move = previous_nodes; + + // VV: Redistribute last tasks from overflowed nodes to new ones + while ( num_need_to_move > 0 && node_to_move < new_num_nodes ) { + for ( node:new_used_nodes ) { + if ( num_need_to_move == 0 ) { + if ( node_to_move < new_num_nodes) { + node_to_move ++; + num_need_to_move = new_avg_tasks; + } else { + break; + } + } + + task = node.tasks[-1] + node_to_tasks[node_to_move].tasks.push_back(task) + num_need_to_move -- + } + } + } + */ + auto new_avg_tasks = (std::size_t) std::ceil(old_mapping.size()/ + (float)new_num_nodes); + auto new_mapping = std::vector(old_mapping.size(), 0ul); + auto node_to_tasks = std::map >(); + // VV: node_to_tasks maps node id to list of tasks that it's running + std::size_t task_id = 0; + std::size_t num_active_nodes = std::count(active_nodes_.begin(), active_nodes_.end(), true); + + for (auto i=0ul; i())); + + for ( const auto &node_id:old_mapping ) + node_to_tasks[node_id].push_back(task_id++); + + + std::cout << "[GLOBAL OPTIMIZER] Rebalancing (original):" << std::endl; + + for ( const auto &node: node_to_tasks ) { + std::cout << "node " << node.first << ": "; + for ( const auto &task:node.second) + std::cout << " " << task; + std::cout << std::endl; + } + + // VV: Something else is setting the scheduling policy too + // try to redistribute tasks to all @previous_num_nodes + + std::cout << "[GLOBAL OPTIMIZER] Re-balancing previous nodes" << std::endl; + + auto prev_avg_tasks = + (std::size_t) std::ceil(old_mapping.size() / + (float)previous_num_nodes); + auto node_fewer_tasks = 1ul; + + for (auto node_id = 0ul; node_id < previous_num_nodes; ++node_id) + { + auto &node = node_to_tasks[node_id]; + while (node.size() > prev_avg_tasks) + { + while (node_to_tasks[node_fewer_tasks].size() >= prev_avg_tasks) + if (++node_fewer_tasks == previous_num_nodes) + break; + + if (node_fewer_tasks == previous_num_nodes) + break; + + auto task = node.back(); + node.pop_back(); + node_to_tasks[node_fewer_tasks].push_back(task); + } + } + + std::cout << "[GLOBAL OPTIMIZER] Rebalanced (still original):" << std::endl; + + for ( const auto &node: node_to_tasks ) { + std::cout << "node " << node.first << ": "; + for ( const auto &task:node.second) + std::cout << " " << task; + std::cout << std::endl; + } + + + std::cout << "[GLOBAL OPTIMIZER] Changing nodes from " + << previous_num_nodes + << " to " << new_num_nodes << std::endl; + + if (new_num_nodes < previous_num_nodes) + { + std::cout << "[GLOBAL OPTIMIZER] Decreasing nodes" << std::endl; + auto lost_node = new_num_nodes; + + while (lost_node < previous_num_nodes && node_to_tasks[lost_node].size()) + { + for (auto node_id = 0ul; node_id < new_num_nodes; ++node_id) + { + auto &node = node_to_tasks[node_id]; + auto old_tasks = node.size(); + for (auto new_tasks = old_tasks; + lost_node < previous_num_nodes && new_tasks < new_avg_tasks; + new_tasks++) + { + // VV: Move next orphaned task to @node + while (node_to_tasks[lost_node].size() == 0) + { + if (++lost_node == previous_num_nodes) + break; + } + + if (lost_node == previous_num_nodes) + break; + + std::size_t task = node_to_tasks[lost_node].back(); + node_to_tasks[lost_node].pop_back(); + node.push_back(task); + } + } + } + } + else if (new_num_nodes > previous_num_nodes) + { + std::cout << "[GLOBAL OPTIMIZER] Increasing nodes" << std::endl; + auto new_node = previous_num_nodes - 1; + for (auto node_id = 0ul; node_id < previous_num_nodes; ++node_id) + { + auto &node = node_to_tasks[node_id]; + while (node.size() > new_avg_tasks) + { + while (node_to_tasks[new_node].size() >= new_avg_tasks) + if (++new_node == new_num_nodes) + break; + + if (new_node == new_num_nodes) + break; + + auto task = node.back(); + node.pop_back(); + node_to_tasks[new_node].push_back(task); + } + } + } + else + { + std::cout << "[GLOBAL OPTIMIZER] Did not modify mapping" << std::endl; + } + + if (previous_num_nodes != new_num_nodes ){ + { + std::cout << "[GLOBAL OPTIMIZER] Rebalancing (NEW):" << std::endl; + + for ( const auto &node: node_to_tasks ) { + std::cout << "node " << node.first << ": "; + for ( const auto &task:node.second) + std::cout << " " << task; + std::cout << std::endl; + } + + } + previous_num_nodes = new_num_nodes; + hpx::lcos::broadcast_apply(localities_, new_mapping); + } + } + }); +} + hpx::future global_optimizer::balance_ino(const std::vector &old_mapping) { /*VV: Compute the new ino_knobs (i.e. number of Nodes), then assign tasks to diff --git a/src/scheduler.cpp b/src/scheduler.cpp index d88d568..6a63a0a 100644 --- a/src/scheduler.cpp +++ b/src/scheduler.cpp @@ -175,6 +175,7 @@ namespace allscale * ALLSCALE_RESOURCE_LEEWAY = (0.0, 1.0) // extra percentage allowed to explore */ ino, + ino_nmd, random, truly_random }; @@ -194,6 +195,8 @@ namespace allscale return "tuned"; case ino: return "ino"; + case ino_nmd: + return "ino_nmd"; case random: return "random"; case truly_random: @@ -234,6 +237,12 @@ namespace allscale tree_scheduling_policy::create_uniform(allscale::get_num_localities()) }; } + if (policy == "ino_nmd" ) { + return { + replacable_policy::ino_nmd, + tree_scheduling_policy::create_uniform(allscale::get_num_localities()) + }; + } if (policy == "truly_random") { return { @@ -343,8 +352,8 @@ namespace allscale void apply_new_mapping(const std::vector &new_mapping) { std::lock_guard l(mtx_); - policy_.policy_ = tree_scheduling_policy::from_mapping(*policy_.policy_, - new_mapping); + policy_.policy_ = + tree_scheduling_policy::from_mapping(*policy_.policy_, new_mapping); } void toggle_node(std::size_t locality_id) @@ -493,6 +502,11 @@ namespace allscale tree_scheduling_policy const& old = static_cast(*policy_.policy_); optimizer_.balance_ino(old.task_distribution_mapping()); } + + if ( policy_.value_ == replacable_policy::ino_nmd) { + tree_scheduling_policy const& old = static_cast(*policy_.policy_); + optimizer_.balance_ino_nmd(old.task_distribution_mapping()); + } if (policy_.value_ == replacable_policy::truly_random) { tree_scheduling_policy const& old = static_cast(*policy_.policy_); @@ -512,7 +526,7 @@ namespace allscale void schedule(work_item work) { - if (is_root_ && work.id().is_root() && work.id().id % 20 == 0) + if (is_root_ && work.id().is_root() && work.id().id % 5 == 0) { balance(); } From f1ceffcb9bd6ee90da54cf38b9890a0f076935d3 Mon Sep 17 00:00:00 2001 From: Vassilis Vassiliadis Date: Mon, 19 Nov 2018 14:08:10 +0000 Subject: [PATCH 17/37] Take into account that nodes might have died Added a virtual to physical node dictionary. INO_NMD assumes that nodes are in sequential order while it is making decision on which nodes to use but once it has made its choices it makes sure that it uses only nodes which are working --- src/components/scheduler_component.cpp | 6 +++++- src/optimizer.cpp | 19 ++++++++++++++++++- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/src/components/scheduler_component.cpp b/src/components/scheduler_component.cpp index 0ef7413..ce516b1 100644 --- a/src/components/scheduler_component.cpp +++ b/src/components/scheduler_component.cpp @@ -1462,7 +1462,11 @@ void scheduler::stop() { update_power_consumption(measurement, dt_power); } #endif - + if ( meas_active_threads_count == 0 ) + meas_active_threads_count = 1; + if ( meas_power_count == 0 ) + meas_power_count = 1; + std::cout << "\n****************************************************\n" << std::flush; std::cout << "Measured Metrics of Application Execution:\n" diff --git a/src/optimizer.cpp b/src/optimizer.cpp index 4fedd61..2fcc698 100644 --- a/src/optimizer.cpp +++ b/src/optimizer.cpp @@ -586,7 +586,7 @@ hpx::future global_optimizer::balance_ino_nmd(const std::vector prev_avg_tasks) @@ -689,6 +689,23 @@ hpx::future global_optimizer::balance_ino_nmd(const std::vector(); + + std::size_t cur_node = 0ul; + + for (const auto &physical:active_nodes_) { + if ( physical ) { + std::cout << "Node " << cur_node << " is alive!" << std::endl; + virtual_to_physical.push_back(cur_node); + } + cur_node ++; + } + + for (auto i = 0ul; i< new_mapping.size(); ++i) + new_mapping[i] = virtual_to_physical[new_mapping[i]]; + previous_num_nodes = new_num_nodes; hpx::lcos::broadcast_apply(localities_, new_mapping); } From eb457cce0e7c49d3c6c876ac343cadf0d64d24f5 Mon Sep 17 00:00:00 2001 From: Vassilis Vassiliadis Date: Mon, 19 Nov 2018 15:31:34 +0000 Subject: [PATCH 18/37] Generalize a bit NMD - Though it still uses "threads" and "freq_idx" as knob names the only restriction is that these values must be dicrete (i.e integers) - Expects that step() contains the actual measured values of "the knob set". - For example, the scheduler may not always manage to resume/suspend the number of threads that the local-optimizer suggests - Added a maximum region that can be searched for new knob_set alternatives so that the spirit of the optimization process is kept sort of intact --- allscale/components/localoptimizer.hpp | 2 +- allscale/components/nmsimplex_bbincr.hpp | 15 +- src/components/localoptimizer.cpp | 18 ++- src/components/nmsimplex_bbincr.cpp | 181 ++++++++++++++++------- src/components/scheduler_component.cpp | 2 +- src/optimizer.cpp | 43 +++--- 6 files changed, 181 insertions(+), 80 deletions(-) diff --git a/allscale/components/localoptimizer.hpp b/allscale/components/localoptimizer.hpp index 722520c..a26db1f 100644 --- a/allscale/components/localoptimizer.hpp +++ b/allscale/components/localoptimizer.hpp @@ -158,7 +158,7 @@ struct localoptimizer void setmaxthreads(std::size_t threads); /* executes one step of multi-objective optimization */ - actuation step(); + actuation step(std::size_t active_threads); /* adds a measurement sample to the specified objective */ void measureObjective(double iter_time, double power, double threads); diff --git a/allscale/components/nmsimplex_bbincr.hpp b/allscale/components/nmsimplex_bbincr.hpp index 8ed77dc..81704c3 100644 --- a/allscale/components/nmsimplex_bbincr.hpp +++ b/allscale/components/nmsimplex_bbincr.hpp @@ -129,7 +129,8 @@ class NelderMead double evaluate_score(const double objectives[], const double *weights); void set_weights(const double weights[]); - optstepresult step(const double objectives[]); + optstepresult step(const double objectives[], + double knob1, double knob2); private: int warming_up_step; @@ -148,10 +149,14 @@ class NelderMead MapCache_t cache_; optstepresult do_step_start(); - optstepresult do_step_reflect(const double objectives[]); - optstepresult do_step_expand(const double objectives[]); - optstepresult do_step_contract(const double objectives[]); - optstepresult do_step_shrink(const double objectives[]); + optstepresult do_step_reflect(const double objectives[], + double knob1, double knob2); + optstepresult do_step_expand(const double objectives[], + double knob1, double knob2); + optstepresult do_step_contract(const double objectives[], + double knob1, double knob2); + optstepresult do_step_shrink(const double objectives[], + double knob1, double knob2); void sort_vertices(void); void my_constraints(double *); diff --git a/src/components/localoptimizer.cpp b/src/components/localoptimizer.cpp index f0a36d4..f70f76c 100644 --- a/src/components/localoptimizer.cpp +++ b/src/components/localoptimizer.cpp @@ -203,9 +203,11 @@ void localoptimizer::reset_accumulated_measurements() pending_num_times = 0; } -actuation localoptimizer::step() +actuation localoptimizer::step(std::size_t active_threads) { actuation act; + // VV: Possibly amend erroneous information + threads_param_ = active_threads; act.threads = threads_param_; #ifdef ALLSCALE_HAVE_CPUFREQ act.frequency_idx = frequency_param_; @@ -232,7 +234,14 @@ actuation localoptimizer::step() reset_accumulated_measurements(); if ( explore_knob_domain ){ - optstepresult nmd_res = nmd.step(latest_measurements); + optstepresult nmd_res = nmd.step(latest_measurements, + active_threads, +#ifdef ALLSCALE_HAVE_CPUFREQ + frequency_param_ +#else + 0 +#endif + ); #ifdef DEBUG_MULTIOBJECTIVE_ std::cout << "[LOCALOPTIMIZER|DEBUG] New Vertex to try:"; @@ -291,6 +300,11 @@ actuation localoptimizer::step() else if (act.frequency_idx > frequencies_param_allowed_.size() - 1) act.frequency_idx = frequencies_param_allowed_.size() - 1; #endif + + threads_param_ = act.threads; +#ifdef ALLSCALE_HAVE_CPUFREQ + frequency_param_ = act.frequency_idx; +#endif return act; } } // namespace components diff --git a/src/components/nmsimplex_bbincr.cpp b/src/components/nmsimplex_bbincr.cpp index 9f42307..7fb76b5 100644 --- a/src/components/nmsimplex_bbincr.cpp +++ b/src/components/nmsimplex_bbincr.cpp @@ -11,6 +11,8 @@ */ #include +#include + #define NMD_DEBUG_ 1 #define NMD_INFO_ 1 @@ -183,6 +185,9 @@ void NelderMead::generate_new(F &gen) max_nested_level *=2; auto timestamp_now = std::chrono::time_point_cast(std::chrono::system_clock::now()).time_since_epoch().count(); + // VV: Restrict search-grid to a maximum block of 5x5 + int retries = 0; + const int retries_threshold = 5*5; int is_same; do { @@ -197,10 +202,12 @@ void NelderMead::generate_new(F &gen) auto dt = timestamp_now - entry->second._cache_timestamp; is_same = dt <= entry->second._cache_expires_dt; } - + + ++ retries; if ( ( level < max_level +1) && is_same - && max_combinations > (NMD_NUM_KNOBS + 1)) + && max_combinations > (NMD_NUM_KNOBS + 1) + && retries < retries_threshold ) { # if 0 extra[0] = rand() % (int)(constraint_max[0] - constraint_min[0]) @@ -219,12 +226,20 @@ void NelderMead::generate_new(F &gen) #endif OUT_DEBUG( std::cout << "[NelderMead|Debug] Rejecting " - << new_set[0] << " " << new_set[1] << std::endl; + << new_set[0] << " " << new_set[1] + << " will try offset " << extra[0] << " " << extra[1] << std::endl; ) } else { break; } } while ( 1 ); + + if ( retries >= retries_threshold ) { + extra[0] = 0; + extra[1] = 0; + + gen(extra); + } } void NelderMead::my_constraints(double x[]) @@ -312,7 +327,7 @@ double NelderMead::evaluate_score(const double objectives[], const double *weigh #else score = 1.0; for ( auto i=0; isecond._cache_expires_dt) { - return do_step_reflect(entry->second.objectives); + return do_step_reflect(entry->second.objectives, + entry->second.threads, + entry->second.freq_idx); } } return res; } -optstepresult NelderMead::do_step_reflect(const double objectives[]) +optstepresult NelderMead::do_step_reflect(const double objectives[], + double knob1, double knob2) { optstepresult res; #ifdef NMD_DEBUG_ std::cout << "[NelderMead DEBUG] State = Reflection" << std::endl; #endif // VV: Make sure that we actually profiled what we meant to - int profiled_threads = objectives[2]; - - if ( (int) vr[0] != profiled_threads ) { - std::cout << "[NelderMead|WARN] Meant to profile " << vr[0] << " threads " - "but ended up using " << profiled_threads << std::endl; + double profiled[] = {knob1, knob2}; + my_constraints(profiled); + + if ( vr[0] != profiled[0] || vr[1] != profiled[1] ) { + std::cout << "[NelderMead|WARN] Meant to profile " << vr[0] << " knob1 " + "but ended up using " << profiled[0] << std::endl; + std::cout << "[NelderMead|WARN] Meant to profile " << vr[1] << " knob2 " + "but ended up using " << profiled[1] << std::endl; auto key = std::make_pair((int)vr[0], (int)vr[1]); auto iter = cache_.find(key); if ( iter != cache_.end() ) { - iter->second.threads = profiled_threads; + iter->second.threads = profiled[0]; + iter->second.freq_idx = profiled[1]; } - vr[0] = profiled_threads; + vr[0] = profiled[0]; + vr[1] = profiled[1]; + + cache_update((int)vr[0], (int)vr[1], objectives, true); } fr = evaluate_score(objectives, opt_weights); @@ -695,7 +720,9 @@ optstepresult NelderMead::do_step_reflect(const double objectives[]) if (dt < entry->second._cache_expires_dt) { - return do_step_expand(entry->second.objectives); + return do_step_expand(entry->second.objectives, + entry->second.threads, + entry->second.freq_idx); } } @@ -732,7 +759,9 @@ optstepresult NelderMead::do_step_reflect(const double objectives[]) if (dt < entry->second._cache_expires_dt) { - return do_step_contract(entry->second.objectives); + return do_step_contract(entry->second.objectives, + entry->second.threads, + entry->second.freq_idx); } } @@ -768,7 +797,9 @@ optstepresult NelderMead::do_step_reflect(const double objectives[]) if (dt < entry->second._cache_expires_dt) { - return do_step_contract(entry->second.objectives); + return do_step_contract(entry->second.objectives, + entry->second.threads, + entry->second.freq_idx); } } @@ -776,27 +807,34 @@ optstepresult NelderMead::do_step_reflect(const double objectives[]) } } -optstepresult NelderMead::do_step_expand(const double objectives[]) +optstepresult NelderMead::do_step_expand(const double objectives[], + double knob1, double knob2) { #ifdef NMD_DEBUG_ std::cout << "[NelderMead DEBUG] State = Expansion" << std::endl; #endif fe = evaluate_score(objectives, nullptr); - // VV: Make sure that we actually profiled what we meant to - int profiled_threads = objectives[2]; + double profiled[] = {knob1, knob2}; + my_constraints(profiled); - if ( (int) ve[0] != profiled_threads ) { - std::cout << "[NelderMead|WARN] Meant to profile " << ve[0] << " threads " - "but ended up using " << profiled_threads << std::endl; + if ( ve[0] != profiled[0] || ve[1] != profiled[1] ) { + std::cout << "[NelderMead|WARN] Meant to profile expand " << ve[0] << " knob1 " + "but ended up using " << profiled[0] << std::endl; + std::cout << "[NelderMead|WARN] Meant to profile expand " << ve[1] << " knob2 " + "but ended up using " << profiled[1] << std::endl; auto key = std::make_pair((int)ve[0], (int)ve[1]); auto iter = cache_.find(key); if ( iter != cache_.end() ) { - iter->second.threads = profiled_threads; + iter->second.threads = profiled[0]; + iter->second.freq_idx = profiled[1]; } - ve[0] = profiled_threads; + ve[0] = profiled[0]; + ve[1] = profiled[1]; + + cache_update((int)ve[0], (int)ve[1], objectives, true); } if (fe < fr) @@ -827,7 +865,8 @@ optstepresult NelderMead::do_step_expand(const double objectives[]) return do_step_start(); } -optstepresult NelderMead::do_step_contract(const double objectives[]) +optstepresult NelderMead::do_step_contract(const double objectives[], + double knob1, double knob2) { int j; #ifdef NMD_DEBUG_ @@ -835,20 +874,26 @@ optstepresult NelderMead::do_step_contract(const double objectives[]) #endif fc = evaluate_score(objectives, nullptr); - // VV: Make sure that we actually profiled what we meant to - int profiled_threads = objectives[2]; + double profiled[] = {knob1, knob2}; + my_constraints(profiled); - if ( (int) vc[0] != profiled_threads ) { - std::cout << "[NelderMead|WARN] Meant to profile " << vc[0] << " threads " - "but ended up using " << profiled_threads << std::endl; + if ( vc[0] != profiled[0] || vc[1] != profiled[1] ) { + std::cout << "[NelderMead|WARN] Meant to profile contract " << vc[0] << " knob1 " + "but ended up using " << profiled[0] << std::endl; + std::cout << "[NelderMead|WARN] Meant to profile contract " << vc[1] << " knob2 " + "but ended up using " << profiled[1] << std::endl; auto key = std::make_pair((int)vc[0], (int)vc[1]); auto iter = cache_.find(key); if ( iter != cache_.end() ) { - iter->second.threads = profiled_threads; + iter->second.threads = profiled[0]; + iter->second.freq_idx = profiled[1]; } - vc[0] = profiled_threads; + vc[0] = profiled[0]; + vc[1] = profiled[1]; + + cache_update((int)vc[0], (int)vc[1], objectives, true); } if (fc <= fr) @@ -904,7 +949,9 @@ optstepresult NelderMead::do_step_contract(const double objectives[]) if (dt < entry->second._cache_expires_dt) { - return do_step_shrink(entry->second.objectives); + return do_step_shrink(entry->second.objectives, + entry->second.threads, + entry->second.freq_idx); } } @@ -912,21 +959,34 @@ optstepresult NelderMead::do_step_contract(const double objectives[]) } } -optstepresult NelderMead::do_step_shrink(const double objectives[]) +optstepresult NelderMead::do_step_shrink(const double objectives[], + double knob1, double knob2) { #ifdef NMD_DEBUG_ std::cout << "[NelderMead|DEBUG] State = Shrink" << std::endl; #endif f[vh] = evaluate_score(objectives, nullptr); - // VV: Make sure that we actually profiled what we meant to - int profiled_threads = objectives[2]; + double profiled[] = {knob1, knob2}; + my_constraints(profiled); - if ( (int) v[vh][0] != profiled_threads ) { - std::cout << "[NelderMead|WARN] Meant to profile " << v[vh][0] << " threads " - "but ended up using " << profiled_threads << std::endl; + if ( v[vh][0] != profiled[0] || v[vh][1] != profiled[1] ) { + std::cout << "[NelderMead|WARN] Meant to profile shrink " << v[vh][0] << " knob1 " + "but ended up using " << profiled[0] << std::endl; + std::cout << "[NelderMead|WARN] Meant to profile shrink " << v[vh][1] << " knob2 " + "but ended up using " << profiled[1] << std::endl; - v[vh][0] = profiled_threads; + auto key = std::make_pair((int)v[vh][0], (int)v[vh][1]); + auto iter = cache_.find(key); + if ( iter != cache_.end() ) { + iter->second.threads = profiled[0]; + iter->second.freq_idx = profiled[1]; + } + + v[vh][0] = profiled[0]; + v[vh][1] = profiled[1]; + + cache_update((int)v[vh][0], (int)v[vh][1], objectives, true); } const int threads = (int)(v[vh][0]); @@ -937,7 +997,8 @@ optstepresult NelderMead::do_step_shrink(const double objectives[]) return do_step_start(); } -optstepresult NelderMead::step(const double objectives[]) +optstepresult NelderMead::step(const double objectives[], + double knob1, double knob2) { int i, j; @@ -987,25 +1048,37 @@ optstepresult NelderMead::step(const double objectives[]) ) // VV: Make sure that we actually profiled what we meant to - int profiled_threads = objectives[2]; - if ( warming_up_step > 0 && warming_up_step <= NMD_NUM_KNOBS + 1) { - if ( (int) v[warming_up_step-1][0] != profiled_threads ) { - std::cout << "[NelderMead|WARN] Meant to profile " - << v[warming_up_step-1][0] << " threads " - "but ended up using " << profiled_threads << std::endl; - v[warming_up_step-1][0] = profiled_threads; + double profiled[] = {knob1, knob2}; + my_constraints(profiled); + + if ( v[warming_up_step-1][0] != profiled[0] || v[warming_up_step-1][1] != profiled[1] ) { + std::cout << "[NelderMead|WARN] Meant to profile expand " << v[warming_up_step-1][0] << " knob1 " + "but ended up using " << profiled[0] << std::endl; + std::cout << "[NelderMead|WARN] Meant to profile expand " << v[warming_up_step-1][1] << " knob2 " + "but ended up using " << profiled[1] << std::endl; + + auto key = std::make_pair((int)v[warming_up_step-1][0], (int)v[warming_up_step-1][1]); + auto iter = cache_.find(key); + if ( iter != cache_.end() ) { + iter->second.threads = profiled[0]; + iter->second.freq_idx = profiled[1]; + } + + v[warming_up_step-1][0] = profiled[0]; + v[warming_up_step-1][1] = profiled[1]; } + // VV: Record results of last warming up step f[warming_up_step-1] = evaluate_score(objectives, nullptr); - cache_update(profiled_threads, v[warming_up_step-1][1], + cache_update(v[warming_up_step-1][0], v[warming_up_step-1][1], objectives, true); } if ( warming_up_step == NMD_NUM_KNOBS + 1) { // VV: We need not explore the knob_set space anymore state_ = start; - return step(objectives); + return step(objectives, knob1, knob2); } else if (warming_up_step > NMD_NUM_KNOBS + 1) { std::cout << "[NelderMead|Warn] Unknown warmup step " << warming_up_step << std::endl; } @@ -1031,16 +1104,16 @@ optstepresult NelderMead::step(const double objectives[]) res = do_step_start(); break; case reflection: - res = do_step_reflect(objectives); + res = do_step_reflect(objectives, knob1, knob2); break; case expansion: - res = do_step_expand(objectives); + res = do_step_expand(objectives, knob1, knob2); break; case contraction: - res = do_step_contract(objectives); + res = do_step_contract(objectives, knob1, knob2); break; case shrink: - res = do_step_shrink(objectives); + res = do_step_shrink(objectives, knob1, knob2); break; default: std::cout << "Unknown NelderMead state (" << state_ << ")" << std::endl; diff --git a/src/components/scheduler_component.cpp b/src/components/scheduler_component.cpp index ce516b1..836e465 100644 --- a/src/components/scheduler_component.cpp +++ b/src/components/scheduler_component.cpp @@ -713,7 +713,7 @@ void scheduler::optimize_locally(work_item const& work) if (elapsedTimeMs > optimization_period_ms || nr_opt_steps == 0){ last_optimization_timestamp_= t_duration_now; nr_opt_steps++; - actuation act_temp = lopt_.step(); + actuation act_temp = lopt_.step(active_threads); #ifdef DEBUG_MULTIOBJECTIVE_ lopt_.printverbosesteps(act_temp); #endif diff --git a/src/optimizer.cpp b/src/optimizer.cpp index 2fcc698..8567870 100644 --- a/src/optimizer.cpp +++ b/src/optimizer.cpp @@ -37,7 +37,7 @@ namespace allscale my_time = -1.f; allscale::components::monitor *monitor_c = &allscale::monitor::get(); - float power_now = 100.f; + float power_now = 0.001f; #if defined(POWER_ESTIMATE) || defined(ALLSCALE_HAVE_CPUFREQ) power_now = monitor_c->get_current_power() / monitor_c->get_max_power(); #endif @@ -503,7 +503,9 @@ hpx::future global_optimizer::balance_ino_nmd(const std::vector global_optimizer::balance_ino_nmd(const std::vector(); + + std::size_t cur_node = 0ul; + + for (const auto &physical:active_nodes_) { + if ( physical ) { + std::cout << "Node " << cur_node << " is alive!" << std::endl; + virtual_to_physical.push_back(cur_node); + } + cur_node ++; + } + + if ( new_num_nodes > num_active_nodes ) + new_num_nodes = num_active_nodes; + + if ( previous_num_nodes > num_active_nodes ) + previous_num_nodes = num_active_nodes; + auto new_avg_tasks = (std::size_t) std::ceil(old_mapping.size()/ (float)new_num_nodes); auto new_mapping = std::vector(old_mapping.size(), 0ul); auto node_to_tasks = std::map >(); // VV: node_to_tasks maps node id to list of tasks that it's running std::size_t task_id = 0; - std::size_t num_active_nodes = std::count(active_nodes_.begin(), active_nodes_.end(), true); + for (auto i=0ul; i())); @@ -689,20 +712,6 @@ hpx::future global_optimizer::balance_ino_nmd(const std::vector(); - - std::size_t cur_node = 0ul; - - for (const auto &physical:active_nodes_) { - if ( physical ) { - std::cout << "Node " << cur_node << " is alive!" << std::endl; - virtual_to_physical.push_back(cur_node); - } - cur_node ++; - } - for (auto i = 0ul; i< new_mapping.size(); ++i) new_mapping[i] = virtual_to_physical[new_mapping[i]]; From de28905df5d4b18a453641a603db79b0fc40068f Mon Sep 17 00:00:00 2001 From: Vassilis Vassiliadis Date: Mon, 19 Nov 2018 16:09:08 +0000 Subject: [PATCH 19/37] Improved INO_NMD and dashboard integration --- allscale/optimizer.hpp | 2 ++ src/dashboard.cpp | 7 +++++-- src/optimizer.cpp | 14 ++++++++++++++ src/scheduler.cpp | 5 +++++ 4 files changed, 26 insertions(+), 2 deletions(-) diff --git a/allscale/optimizer.hpp b/allscale/optimizer.hpp index 7f019fa..4452bea 100644 --- a/allscale/optimizer.hpp +++ b/allscale/optimizer.hpp @@ -110,6 +110,8 @@ namespace allscale { hpx::future balance_ino(const std::vector &old_mapping); hpx::future balance_ino_nmd(const std::vector &old_mapping); hpx::future decide_random_mapping(const std::vector &old_mapping); + + void signal_objective_changed(); bool may_rebalance(); diff --git a/src/dashboard.cpp b/src/dashboard.cpp index fcf7e8b..6643e1b 100644 --- a/src/dashboard.cpp +++ b/src/dashboard.cpp @@ -23,6 +23,9 @@ #include +// VV: Define this to use time/energy/resources instead of speed/energy/efficiency +#define ALTERNATIVE_SCORE + namespace allscale { namespace dashboard { node_state get_state() @@ -57,7 +60,7 @@ namespace allscale { namespace dashboard state.productive_cycles_per_second = float(state.cur_frequency) * (1.f - state.idle_rate); // freq to Hz -#ifdef ALLSCALE_HAVE_CPUFREQ +#if defined(ALLSCALE_HAVE_CPUFREQ) || defined(ALTERNATIVE_SCORE) state.speed = monitor_c->get_avg_time_last_iterations(100); state.efficiency = active_cores; #else @@ -168,7 +171,7 @@ namespace allscale { namespace dashboard float system_state::score() const { -#ifdef ALLSCALE_HAVE_CPUFREQ +#if defined(ALLSCALE_HAVE_CPUFREQ) || defined(ALTERNATIVE_SCORE) return std::exp(speed * speed_exponent) * std::exp(efficiency * efficiency_exponent ) * std::exp(power * power_exponent); diff --git a/src/optimizer.cpp b/src/optimizer.cpp index 8567870..4311e4c 100644 --- a/src/optimizer.cpp +++ b/src/optimizer.cpp @@ -239,6 +239,20 @@ global_optimizer::global_optimizer() } } +void global_optimizer::signal_objective_changed() +{ + const double new_weights[3] = { + objective_.speed_exponent, + objective_.power_exponent, + objective_.efficiency_exponent + }; + + nmd.set_weights(new_weights); + + if ( nmd_initialized ) + nmd_initialized = 0; +} + void global_optimizer::tune(std::vector const &state) { allscale::components::monitor *monitor_c = &allscale::monitor::get(); diff --git a/src/scheduler.cpp b/src/scheduler.cpp index 6a63a0a..ab19eff 100644 --- a/src/scheduler.cpp +++ b/src/scheduler.cpp @@ -379,6 +379,8 @@ namespace allscale { std::lock_guard l(optimizer_.mtx_); optimizer_.objective_.speed_exponent = exp; + optimizer_.signal_objective_changed(); + double time_weight, energy_weight, resource_weight; auto &&local_scheduler = scheduler::get(); @@ -397,6 +399,8 @@ namespace allscale { std::lock_guard l(optimizer_.mtx_); optimizer_.objective_.efficiency_exponent = exp; + optimizer_.signal_objective_changed(); + double time_weight, energy_weight, resource_weight; auto &&local_scheduler = scheduler::get(); @@ -415,6 +419,7 @@ namespace allscale { std::lock_guard l(optimizer_.mtx_); optimizer_.objective_.power_exponent = exp; + optimizer_.signal_objective_changed(); double time_weight, energy_weight, resource_weight; auto &&local_scheduler = scheduler::get(); From d80618ba4a600801691c96e266fd711256e8b878 Mon Sep 17 00:00:00 2001 From: Vassilis Vassiliadis Date: Mon, 19 Nov 2018 16:53:26 +0000 Subject: [PATCH 20/37] Improved INO_NMD and dashboard integration --- src/dashboard.cpp | 7 +++- src/optimizer.cpp | 89 ++++++++++++++++++++++++++--------------------- 2 files changed, 56 insertions(+), 40 deletions(-) diff --git a/src/dashboard.cpp b/src/dashboard.cpp index 6643e1b..d02f98b 100644 --- a/src/dashboard.cpp +++ b/src/dashboard.cpp @@ -503,13 +503,18 @@ namespace allscale { namespace dashboard total_efficiency += cur.efficiency; cur_power += cur.cur_power; } + max_power += cur.max_power; } state.speed = total_speed / client.localities_.size(); // state.speed = std::pow(total_speed, 1.f/client.localities_.size()); - +#if defined(ALLSCALE_HAVE_CPUFREQ) || defined(ALTERNATIVE_SCORE) + // VV: This is the number of active threads + state.efficiency = total_efficiency; +#else state.efficiency = total_efficiency / client.localities_.size(); +#endif state.power = (max_power > 0) ? cur_power/max_power : 0; auto exponents = scheduler::get_optimizer_exponents(); diff --git a/src/optimizer.cpp b/src/optimizer.cpp index 4311e4c..924a9f3 100644 --- a/src/optimizer.cpp +++ b/src/optimizer.cpp @@ -26,6 +26,16 @@ #define TRULY_RANDOM_DEBUG +#define DEBUG_NMD_INO 1 + +#ifdef DEBUG_NMD_INO +#define OUT_DEBUG(X) X +#else +#define OUT_DEBUG(X) \ + { \ + } +#endif + namespace allscale { optimizer_state get_optimizer_state() @@ -472,12 +482,7 @@ hpx::future global_optimizer::balance_ino_nmd(const std::vector global_optimizer::balance_ino_nmd(const std::vector " - << constraint_max[0] << " and " - << constraint_min[1] << " -> " - << constraint_max[1] << std::endl; - } + nmd.initialize_simplex(weights, nullptr, constraint_min, @@ -577,7 +576,9 @@ hpx::future global_optimizer::balance_ino_nmd(const std::vector global_optimizer::balance_ino_nmd(const std::vector global_optimizer::balance_ino_nmd(const std::vector global_optimizer::balance_ino_nmd(const std::vector previous_num_nodes) { - std::cout << "[GLOBAL OPTIMIZER] Increasing nodes" << std::endl; + OUT_DEBUG( + std::cout << "[GLOBAL OPTIMIZER] Increasing nodes" << std::endl; + ) auto new_node = previous_num_nodes - 1; for (auto node_id = 0ul; node_id < previous_num_nodes; ++node_id) { @@ -711,11 +720,13 @@ hpx::future global_optimizer::balance_ino_nmd(const std::vector global_optimizer::balance_ino_nmd(const std::vector Date: Tue, 20 Nov 2018 11:38:50 +0000 Subject: [PATCH 21/37] ALLSCALE_HAVE_CPUFREQ determines whether CPUFREQ is available - We can set CPU governor, retrieve and set CPU frequency --- allscale/components/localoptimizer.hpp | 22 +- allscale/components/scheduler.hpp | 10 +- src/components/localoptimizer.cpp | 47 ++-- src/components/monitor_component.cpp | 8 +- src/components/scheduler_component.cpp | 359 ++++++++++--------------- src/dashboard.cpp | 12 +- 6 files changed, 170 insertions(+), 288 deletions(-) diff --git a/allscale/components/localoptimizer.hpp b/allscale/components/localoptimizer.hpp index a26db1f..1a04e9d 100644 --- a/allscale/components/localoptimizer.hpp +++ b/allscale/components/localoptimizer.hpp @@ -44,14 +44,7 @@ enum searchPolicy struct actuation { unsigned int threads; - -#if defined(ALLSCALE_HAVE_CPUFREQ) - /* index to the global cpu-supported frequencies vector pointing to - the new frequency to be set. If set to -1, frequency will stay - unchanged */ int frequency_idx; - int previous_frequency_idx; -#endif }; struct localoptimizer @@ -62,9 +55,7 @@ struct localoptimizer pending_time(0.), pending_num_times(0.), mo_initialized(false), -#if defined(ALLSCALE_HAVE_CPUFREQ) frequency_param_(0), -#endif converged_(false), convergence_threshold_(0.005), time_weight(0.0), @@ -87,9 +78,7 @@ struct localoptimizer << std::endl; #endif } -#ifdef ALLSCALE_HAVE_CPUFREQ void initialize_nmd(bool from_scratch); -#endif searchPolicy getPolicy() { return optmethod_; } // VV: Modifying the objectives triggers restarting the optimizer @@ -113,7 +102,7 @@ struct localoptimizer void setCurrentThreads(std::size_t threads) { threads_param_ = threads; } -#if defined(ALLSCALE_HAVE_CPUFREQ) + unsigned int getCurrentFrequencyIdx() { return frequency_param_; @@ -149,7 +138,7 @@ struct localoptimizer // std::cout << "***>>>> " << el << std::endl; return frequencies_param_allowed_; } -#endif + std::size_t getmaxthreads() { return max_threads_; @@ -226,19 +215,12 @@ struct localoptimizer /* maximum number of OS threads supported by the runtime */ std::size_t max_threads_; -#if defined(ALLSCALE_HAVE_CPUFREQ) /* active optimization parameter - current CPU frequency index */ unsigned int frequency_param_; - /* ordered set of frequency values that the CPU has been set to by - the optimization algorithm. The most recent value is stored at the - end of the vector */ - std::vector frequency_param_values_; - /* vector containing sorted list of frequencies supported by the processor */ std::vector frequencies_param_allowed_; -#endif /* threshold (percentage in [0,1]) to decide convergence of optimization steps */ diff --git a/allscale/components/scheduler.hpp b/allscale/components/scheduler.hpp index f728526..90d32e2 100644 --- a/allscale/components/scheduler.hpp +++ b/allscale/components/scheduler.hpp @@ -170,18 +170,12 @@ namespace allscale { namespace components { unsigned long long last_power_usage; unsigned long long power_sum; unsigned long long power_count; + #if defined(ALLSCALE_HAVE_CPUFREQ) cpufreq_policy policy; hardware_reconf::hw_topology topo; - std::vector cpu_freqs; - // Indices correspond to the freq id in cpu_freqs, and - // each pair holds energy usage and execution time - std::vector> freq_times; - - unsigned int freq_step; - bool target_freq_found; #endif - bool target_resource_found; + std::vector cpu_freqs; mutable mutex_type throttle_mtx_; mutable mutex_type resize_mtx_; diff --git a/src/components/localoptimizer.cpp b/src/components/localoptimizer.cpp index f70f76c..6676738 100644 --- a/src/components/localoptimizer.cpp +++ b/src/components/localoptimizer.cpp @@ -41,10 +41,6 @@ void localoptimizer::setobjectives(double time_weight, this->energy_weight = energy_weight; this->resource_weight = resource_weight; -#ifdef ALLSCALE_HAVE_CPUFREQ - setCurrentFrequencyIdx(0); -#endif - // VV: Modifying the objectives triggers restarting the optimizer // from scratch @@ -56,10 +52,8 @@ void localoptimizer::reset(int threads, int freq_idx) { threads_param_ = threads; thread_param_values_.clear(); -#ifdef ALLSCALE_HAVE_CPUFREQ + frequency_param_ = freq_idx; - frequency_param_values_.clear(); -#endif converged_ = false; }; @@ -104,14 +98,11 @@ void localoptimizer::printverbosesteps(actuation act) std::cout << "Allscale "; } std::cout << "Scheduler Step: Setting OS Threads to " << threads_param_; -#ifdef ALLSCALE_HAVE_CPUFREQ + if (act.frequency_idx >= 0) last_frequency_idx = act.frequency_idx; std::cout << " , CPU Frequency to " << frequencies_param_allowed_[last_frequency_idx] << std::endl; -#else - std::cout << std::endl; -#endif } void localoptimizer::accumulate_objective_measurements() @@ -150,14 +141,20 @@ void localoptimizer::setmaxthreads(std::size_t threads) #endif } -#ifdef ALLSCALE_HAVE_CPUFREQ + void localoptimizer::initialize_nmd(bool from_scratch) { // VV: Place constraints to #threads and cpu_freq tunable knobs double constraint_min[] = {1, 0}; + #if defined(ALLSCALE_HAVE_CPUFREQ) double constraint_max[] = {ceil(max_threads_/(double)threads_dt), (double)frequencies_param_allowed_.size() - 1}; + #else + std::cout << "Allowed frequencies: " << frequencies_param_allowed_.size() << std::endl; + double constraint_max[] = {ceil(max_threads_/(double)threads_dt), + 0.0}; + #endif const double opt_weights[] = { time_weight, energy_weight, resource_weight }; if( from_scratch == false ){ @@ -180,7 +177,6 @@ void localoptimizer::initialize_nmd(bool from_scratch) explore_knob_domain = true; converged_ = false; } -#endif void localoptimizer::measureObjective(double iter_time, double power, double threads) { @@ -209,18 +205,15 @@ actuation localoptimizer::step(std::size_t active_threads) // VV: Possibly amend erroneous information threads_param_ = active_threads; act.threads = threads_param_; -#ifdef ALLSCALE_HAVE_CPUFREQ + act.frequency_idx = frequency_param_; -#endif + /* random optimization step */ if (optmethod_ == random) { act.threads = (rand() % max_threads_); -#ifdef ALLSCALE_HAVE_CPUFREQ act.frequency_idx = rand() % frequencies_param_allowed_.size(); -#endif } -#ifdef ALLSCALE_HAVE_CPUFREQ else if (optmethod_ == allscale) { // VV: Keep track of dirty objectives @@ -236,12 +229,7 @@ actuation localoptimizer::step(std::size_t active_threads) if ( explore_knob_domain ){ optstepresult nmd_res = nmd.step(latest_measurements, active_threads, -#ifdef ALLSCALE_HAVE_CPUFREQ - frequency_param_ -#else - 0 -#endif - ); + frequency_param_); #ifdef DEBUG_MULTIOBJECTIVE_ std::cout << "[LOCALOPTIMIZER|DEBUG] New Vertex to try:"; @@ -262,6 +250,7 @@ actuation localoptimizer::step(std::size_t active_threads) #endif act.threads = minimization_point[0]; act.frequency_idx = minimization_point[1]; + // VV: Stop searching for new knob_set explore_knob_domain = false; converged_ = true; @@ -281,8 +270,6 @@ actuation localoptimizer::step(std::size_t active_threads) #endif } } -#endif // ALLSCALE_HAVE_CPUFREQ - validate_act: if (act.threads > max_threads_) @@ -293,18 +280,16 @@ actuation localoptimizer::step(std::size_t active_threads) { act.threads = getCurrentThreads(); } -#ifdef ALLSCALE_HAVE_CPUFREQ + // VV: If freq_idx is -1 then set it to last used frequency (frequency_param_) if (act.frequency_idx < 0) act.frequency_idx = frequency_param_; else if (act.frequency_idx > frequencies_param_allowed_.size() - 1) act.frequency_idx = frequencies_param_allowed_.size() - 1; -#endif - + threads_param_ = act.threads; -#ifdef ALLSCALE_HAVE_CPUFREQ frequency_param_ = act.frequency_idx; -#endif + return act; } } // namespace components diff --git a/src/components/monitor_component.cpp b/src/components/monitor_component.cpp index d1817ae..5ae6463 100644 --- a/src/components/monitor_component.cpp +++ b/src/components/monitor_component.cpp @@ -358,7 +358,7 @@ namespace allscale { namespace components { float monitor::get_current_power() { -#ifdef ALLSCALE_HAVE_CPUFREQ + #ifdef ALLSCALE_HAVE_CPUFREQ /*VV: Read potentially multiple measurements of power within the span of POWER_MEASUREMENT_PERIOD_MS milliseconds. Each time this function is invoked it returns the running average of power.*/ @@ -388,9 +388,9 @@ namespace allscale { namespace components { } return ret; -#else + #else return allscale::power::estimate_power(get_current_freq(0)) * num_cpus_; -#endif + #endif } @@ -406,7 +406,7 @@ namespace allscale { namespace components { #elif defined(POWER_ESTIMATE) return allscale::power::estimate_power(get_max_freq(0)) * num_cpus_; #else - return 0.0; + return 125.0; #endif } diff --git a/src/components/scheduler_component.cpp b/src/components/scheduler_component.cpp index 836e465..76ed483 100644 --- a/src/components/scheduler_component.cpp +++ b/src/components/scheduler_component.cpp @@ -50,14 +50,7 @@ scheduler::scheduler(std::uint64_t rank) current_power_usage(0), last_power_usage(0), power_sum(0), - power_count(0) - -#if defined(ALLSCALE_HAVE_CPUFREQ) - , - target_freq_found(false) -#endif - , - target_resource_found(false), + power_count(0), sampling_interval(10), current_avg_iter_time(0.0), multi_objectives(false), @@ -103,8 +96,9 @@ scheduler::scheduler(std::uint64_t rank) #endif #ifdef ALLSCALE_HAVE_CPUFREQ std::cout << "ALLSCALE_HAVE_CPUFREQ is defined" << std::endl << std::flush; +#else + std::cout << "ALLSCALE_HAVE_CPUFREQ is not defined. No real power measurements or CPU frequency scaling" << std::endl << std::flush; #endif - } /** @@ -229,15 +223,11 @@ void scheduler::init() { #ifdef DEBUG_MULTIOBJECTIVE_ std::cout << "[Local Optimizer|INFO] Optimization Policy Active = " << input_optpolicy_str << std::endl; #endif -#if ALLSCALE_HAVE_CPUFREQ - if (input_optpolicy_str=="allscale") { + if (input_optpolicy_str=="allscale") lopt_.setPolicy(allscale); - } - else -#endif - if (input_optpolicy_str=="random") + else if (input_optpolicy_str=="random") lopt_.setPolicy(random); - else if (input_optpolicy_str=="manual") + else if (input_optpolicy_str=="manual") lopt_.setPolicy(manual); else if ( input_optpolicy_str != "none" ) { HPX_THROW_EXCEPTION(hpx::bad_request, "scheduler::init", @@ -350,10 +340,9 @@ void scheduler::init() { executors_.emplace_back(pool_name); } -#if defined(ALLSCALE_HAVE_CPUFREQ) if (multi_objectives) { -#ifdef DEBUG_INIT_ + #ifdef DEBUG_INIT_ std::cout << "\n****************************************************\n" << std::flush; std::cout << "Policy selected: multi-objective set with time=" << time_weight << ", energy=" << energy_weight @@ -367,18 +356,16 @@ void scheduler::init() { "\tMulti-objective: " << multi_objectives << "\n" << std::flush; std::cout << "****************************************************\n" << std::flush; -#endif + #endif } if (energy_requested) initialize_cpu_frequencies(); -#ifdef MEASURE_MANUAL_ + #ifdef MEASURE_MANUAL_ if (manual_input_provided && input_objective_str.empty()) fix_allcores_frequencies(temp_idx); -#endif - -#endif + #endif initialized_ = true; #ifdef DEBUG_INIT_ @@ -398,7 +385,7 @@ void scheduler::init() { lopt_.setmaxthreads(os_thread_count); - #if defined(ALLSCALE_HAVE_CPUFREQ) + #if defined(ALLSCALE_HAVE_CPUFREQ) using hardware_reconf = allscale::components::util::hardware_reconf; auto freqs = hardware_reconf::get_frequencies(0); @@ -409,28 +396,18 @@ void scheduler::init() { } // VV: Set to max number of threads and max frequency lopt_.reset(os_thread_count, freqs.size()-1); -#else + #else // VV: Max number of threads, and an arbitrary frequency index lopt_.reset(os_thread_count,0); -#endif + #endif // VV: Set objectives after setting all constraints to // trigger the initialization of nmd lopt_.setobjectives(time_weight, energy_weight, resource_weight); -#ifdef DEBUG_ + #ifdef DEBUG_ lopt_.printobjectives(); -#endif + #endif } -#if defined(ALLSCALE_HAVE_CPUFREQ) -else { - /* - using hardware_reconf = allscale::components::util::hardware_reconf; - auto freqs = hardware_reconf::get_frequencies(0); - // VV: Set maximum frequency - fix_allcores_frequencies(freqs[freqs.size()-1]); - */ -} -#endif } /** @@ -442,16 +419,13 @@ else { * potential. * */ -void scheduler::initialize_cpu_frequencies() { #if defined(ALLSCALE_HAVE_CPUFREQ) +void scheduler::initialize_cpu_frequencies() +{ using hardware_reconf = allscale::components::util::hardware_reconf; cpu_freqs = hardware_reconf::get_frequencies(0); - freq_step = 8; // cpu_freqs.size() / 2; - freq_times.resize(cpu_freqs.size()); - -#ifdef MEASURE_ -#ifdef ALLSCALE_HAVE_CPUFREQ -#ifdef DEBUG_INIT_ + + #if defined(MEASURE_) && defined(DEBUG_INIT) unsigned long temp_transition_latency=hardware_reconf::get_cpu_transition_latency(1); if (temp_transition_latency==0) std::cout << "[INFO] Transition Latency Unavailable" << @@ -460,45 +434,37 @@ void scheduler::initialize_cpu_frequencies() { std::cout << "[INFO] Core-1 Frequency Transition Latency = " << hardware_reconf::get_cpu_transition_latency(2)/1000 << " milliseconds\n" << std::flush; -#endif -#endif -#endif -#ifdef DEBUG_INIT_ + #endif + + #ifdef DEBUG_INIT_ std::cout << "[INFO] Governors available on the system: " << "\n" << std::flush; -#ifdef ALLSCALE_HAVE_CPUFREQ std::vector temp_governors = hardware_reconf::get_governors(0); for (std::vector::const_iterator i = temp_governors.begin(); i != temp_governors.end(); ++i) std::cout << "[INFO]\t" << *i << "\n" << std::flush; -#endif std::cout << "\n" << std::flush; -#endif -#ifdef DEBUG_INIT_ std::cout << "Server Processor Available Frequencies (size = " << cpu_freqs.size() << ")"; for (auto &ind : cpu_freqs) { std::cout << ind << " "; } std::cout << "\n" << std::flush; -#endif + #endif auto min_max_freqs = std::minmax_element(cpu_freqs.begin(), cpu_freqs.end()); min_freq = *min_max_freqs.first; max_freq = *min_max_freqs.second; - -#ifdef DEBUG_INIT_ - std::cout << "Min freq: " << min_freq << ", Max freq: " << max_freq << "\n" - << std::flush; -#endif // TODO: verify that nbpus == all pus of the system, not just the online // ones size_t nbpus = topo_->get_number_of_pus(); -#ifdef DEBUG_INIT_ + + #ifdef DEBUG_INIT_ + std::cout << "Min freq: " << min_freq << ", Max freq: " << max_freq << "\n" + << std::flush; std::cout << "nbpus known to topo_: " << nbpus << "\n" << std::flush; -#endif + #endif -#ifdef ALLSCALE_HAVE_CPUFREQ hardware_reconf::make_cpus_online(0, nbpus); hardware_reconf::topo_init(); // We have to set CPU governors to userpace in order to change frequencies @@ -509,13 +475,12 @@ void scheduler::initialize_cpu_frequencies() { topo = hardware_reconf::read_hw_topology(); // first reinitialize to a normal setup - for (unsigned int cpu_id = 0; cpu_id < topo.num_logical_cores; cpu_id++) { + for (unsigned int cpu_id = 0; cpu_id < topo.num_logical_cores; cpu_id++){ hardware_reconf::set_freq_policy(cpu_id, policy); -#ifdef DEBUG_INIT_ - std::cout << "cpu_id " << cpu_id << " back to on-demand. ret= " << res - << "\n" - << std::flush; -#endif + #ifdef DEBUG_INIT_ + std::cout << "cpu_id " << cpu_id << " back to on-demand. ret= " + << res << std::endl; + #endif } governor = "userspace"; @@ -523,8 +488,10 @@ void scheduler::initialize_cpu_frequencies() { policy.min = min_freq; policy.max = max_freq; - for (unsigned int cpu_id = 0; cpu_id < topo.num_logical_cores; - cpu_id += topo.num_hw_threads) { + for (unsigned int cpu_id = 0; + cpu_id < topo.num_logical_cores; + cpu_id += topo.num_hw_threads) + { int res = hardware_reconf::set_freq_policy(cpu_id, policy); if (res) { HPX_THROW_EXCEPTION(hpx::bad_request, "scheduler::init", @@ -533,34 +500,29 @@ void scheduler::initialize_cpu_frequencies() { return; } -#ifdef DEBUG_INIT_ + #ifdef DEBUG_INIT_ std::cout << "cpu_id " << cpu_id << " initial freq policy setting. ret= " << res << "\n" << std::flush; -#endif + #endif } -#endif - // Set frequency of all threads to max when we start - { - // set freq to all PUs used by allscale - for (std::size_t i = 0; i != thread_pools_.size(); ++i) { - std::size_t thread_count = thread_pools_[i]->get_os_thread_count(); - for (std::size_t j = 0; j < thread_count; j++) { - std::size_t pu_num = - rp_->get_pu_num(j + thread_pools_[i]->get_thread_offset()); + // set freq to all PUs used by allscale + for (std::size_t i = 0; i != thread_pools_.size(); ++i) { + std::size_t thread_count = thread_pools_[i]->get_os_thread_count(); + for (std::size_t j = 0; j < thread_count; j++) { + std::size_t pu_num = + rp_->get_pu_num(j + thread_pools_[i]->get_thread_offset()); -#ifdef ALLSCALE_HAVE_CPUFREQ - if (!cpufreq_cpu_exists(pu_num)) { - hardware_reconf::set_frequency(pu_num, 1, cpu_freqs[0]); -#ifdef DEBUG_INIT_ - std::cout << "Setting cpu " << pu_num << " to freq " << cpu_freqs[0] - << ", (ret= " << res << ")\n" - << std::flush; -#endif - } -#endif + + if (!cpufreq_cpu_exists(pu_num)) { + hardware_reconf::set_frequency(pu_num, 1, cpu_freqs[0]); + #ifdef DEBUG_INIT_ + std::cout << "Setting cpu " << pu_num << " to freq " << cpu_freqs[0] + << ", (ret= " << res << ")\n" + << std::flush; + #endif } } } @@ -571,35 +533,31 @@ void scheduler::initialize_cpu_frequencies() { std::cout << "topo.num_logical_cores: " << topo.num_logical_cores << " topo.num_hw_threads" << topo.num_hw_threads << "\n" << std::flush; - { - // check status of Pus frequency -#ifdef ALLSCALE_HAVE_CPUFREQ - for (std::size_t i = 0; i != thread_pools_.size(); ++i) { - unsigned long hardware_freq = 0; - std::size_t thread_count = thread_pools_[i]->get_os_thread_count(); - for (std::size_t j = 0; j < thread_count; j++) { - std::size_t pu_num = - rp_->get_pu_num(j + thread_pools_[i]->get_thread_offset()); - - if (!cpufreq_cpu_exists(pu_num)) { - do { - hardware_freq = hardware_reconf::get_hardware_freq(pu_num); -#ifdef DEBUG_INIT_ - std::cout << "current freq on cpu " << pu_num << " is " - << hardware_freq << " (target freq is " << cpu_freqs[0] - << " )\n" - << std::flush; - -#endif + // check status of Pus frequency + + for (std::size_t i = 0; i != thread_pools_.size(); ++i) { + unsigned long hardware_freq = 0; + std::size_t thread_count = thread_pools_[i]->get_os_thread_count(); + for (std::size_t j = 0; j < thread_count; j++) { + std::size_t pu_num = + rp_->get_pu_num(j + thread_pools_[i]->get_thread_offset()); + + if (!cpufreq_cpu_exists(pu_num)) { + do { + hardware_freq = hardware_reconf::get_hardware_freq(pu_num); + #ifdef DEBUG_INIT_ + std::cout << "current freq on cpu " << pu_num << " is " + << hardware_freq << " (target freq is " << cpu_freqs[0] + << " )\n" + << std::flush; + #endif - } while (hardware_freq != cpu_freqs[0]); - } + } while (hardware_freq != cpu_freqs[0]); } } -#endif } -#ifdef ALLSCALE_USE_CORE_OFFLINING + #ifdef ALLSCALE_USE_CORE_OFFLINING // offline unused cpus for (unsigned int cpu_id = 0; cpu_id < topo.num_logical_cores; cpu_id += topo.num_hw_threads) { @@ -612,25 +570,23 @@ void scheduler::initialize_cpu_frequencies() { } if (!found_it) { -#ifdef DEBUG_INIT_ + #ifdef DEBUG_INIT_ std::cout << " setting cpu_id " << cpu_id << " offline \n" << std::flush; -#endif + #endif -#ifdef ALLSCALE_HAVE_CPUFREQ hardware_reconf::make_cpus_offline(cpu_id, cpu_id + topo.num_hw_threads); -#endif } } -#endif - + #endif +} #else - // should we really abort or should we reset energy to 1 ? - HPX_THROW_EXCEPTION( - hpx::bad_request, "scheduler::init", - "Requesting energy objective without having compiled with cpufreq"); -#endif +void scheduler::initialize_cpu_frequencies() +{ + cpu_freqs.clear(); + // VV: Bogus frequency + cpu_freqs.push_back(1000*1024); } - +#endif /** * @@ -667,7 +623,6 @@ void scheduler::optimize_locally(work_item const& work) #endif -#ifdef ALLSCALE_HAVE_CPUFREQ if (uselopt && !lopt_.isConverged()) { last_power_usage++; allscale::components::monitor *monitor_c = &allscale::monitor::get(); @@ -749,8 +704,7 @@ void scheduler::optimize_locally(work_item const& work) } } #endif -#endif - } + } } void scheduler::set_local_optimizer_weights(double time_weight, @@ -1258,9 +1212,9 @@ void scheduler::fix_allcores_frequencies(int frequency_idx){ // ones size_t nbpus = topo_->get_number_of_pus(); -#ifdef DEBUG_FREQSCALING_ + #ifdef DEBUG_FREQSCALING_ std::cout << "nbpus known to topo_: " << nbpus << "\n" << std::flush; -#endif + #endif hardware_reconf::make_cpus_online(0, nbpus); hardware_reconf::topo_init(); @@ -1281,68 +1235,69 @@ void scheduler::fix_allcores_frequencies(int frequency_idx){ "set cpu frequency"); return; } -#ifdef DEBUG_FREQSCALING_ + #ifdef DEBUG_FREQSCALING_ std::cout << "cpu_id " << cpu_id << " initial freq policy setting. ret= " << res << "\n" << std::flush; -#endif + #endif } - - { - // set freq of all cores used to min - for (std::size_t i = 0; i != thread_pools_.size(); ++i) { - std::size_t thread_count = thread_pools_[i]->get_os_thread_count(); - for (std::size_t j = 0; j < thread_count; j++) { - std::size_t pu_num = - rp_->get_pu_num(j + thread_pools_[i]->get_thread_offset()); - - if (!cpufreq_cpu_exists(pu_num)) { - //int res = hardware_reconf::set_frequency(pu_num, 1, cpu_freqs[cpu_freqs[.size()-1]]); - int res = hardware_reconf::set_frequency(pu_num, 1, cpu_freqs[frequency_idx]); - (void)res; -#if defined(MEASURE_MANUAL_) - fixed_frequency_ = cpu_freqs[frequency_idx]; -#endif -#ifdef DEBUG_FREQSCALING_ - //std::cout << "Setting cpu " << pu_num << " to freq " << cpu_freqs[cpu_freqs.size()-1] - std::cout << "Setting cpu " << pu_num << " to freq " << cpu_freqs[frequency_idx] - << ", (ret= " << res << ")\n" - << std::flush; -#endif - } + // set freq of all cores used to min + for (std::size_t i = 0; i != thread_pools_.size(); ++i) { + std::size_t thread_count = thread_pools_[i]->get_os_thread_count(); + for (std::size_t j = 0; j < thread_count; j++) { + std::size_t pu_num = + rp_->get_pu_num(j + thread_pools_[i]->get_thread_offset()); + + if (!cpufreq_cpu_exists(pu_num)) { + //int res = hardware_reconf::set_frequency(pu_num, 1, cpu_freqs[cpu_freqs[.size()-1]]); + int res = hardware_reconf::set_frequency(pu_num, 1, cpu_freqs[frequency_idx]); + (void)res; + #if defined(MEASURE_MANUAL_) + fixed_frequency_ = cpu_freqs[frequency_idx]; + #endif + #ifdef DEBUG_FREQSCALING_ + //std::cout << "Setting cpu " << pu_num << " to freq " << cpu_freqs[cpu_freqs.size()-1] + std::cout << "Setting cpu " << pu_num << " to freq " << cpu_freqs[frequency_idx] + << ", (ret= " << res << ")\n" + << std::flush; + #endif } } } - { - // check status of Pus frequency - for (std::size_t i = 0; i != thread_pools_.size(); ++i) { - unsigned long hardware_freq = 0; - std::size_t thread_count = thread_pools_[i]->get_os_thread_count(); - for (std::size_t j = 0; j < thread_count; j++) { - std::size_t pu_num = - rp_->get_pu_num(j + thread_pools_[i]->get_thread_offset()); - if (!cpufreq_cpu_exists(pu_num)) { - do { - hardware_freq = hardware_reconf::get_hardware_freq(pu_num); -#ifdef DEBUG_FREQSCALING_ - std::cout << "current freq on cpu " << pu_num << " is " - //<< hardware_freq << " (target freq is " << cpu_freqs[cpu_freqs.size()-1] - << hardware_freq << " (target freq is " << cpu_freqs[frequency_idx] - << " )\n" - << std::flush; + // check status of Pus frequency + for (std::size_t i = 0; i != thread_pools_.size(); ++i) { + unsigned long hardware_freq = 0; + std::size_t thread_count = thread_pools_[i]->get_os_thread_count(); + for (std::size_t j = 0; j < thread_count; j++) { + std::size_t pu_num = + rp_->get_pu_num(j + thread_pools_[i]->get_thread_offset()); -#endif + if (!cpufreq_cpu_exists(pu_num)) { + do { + hardware_freq = hardware_reconf::get_hardware_freq(pu_num); + #ifdef DEBUG_FREQSCALING_ + std::cout << "current freq on cpu " << pu_num << " is " + //<< hardware_freq << " (target freq is " << cpu_freqs[cpu_freqs.size()-1] + << hardware_freq << " (target freq is " << cpu_freqs[frequency_idx] + << " )\n" - //} while (hardware_freq != cpu_freqs[cpu_freqs.size()-1]); - } while (hardware_freq != cpu_freqs[frequency_idx]); - } + << std::flush; + #endif + //} while (hardware_freq != cpu_freqs[cpu_freqs.size()-1]); + } while (hardware_freq != cpu_freqs[frequency_idx]); } } } + +} +#else +void scheduler::fix_allcores_frequencies(int frequency_idx) +{ + // VV: This is a stub } #endif @@ -1404,44 +1359,7 @@ void scheduler::stop() { ++pool_idx; } } - - /* - - if (energy_requested) { -#if defined(ALLSCALE_HAVE_CPUFREQ) - - for (int cpu_id = 0; cpu_id < topo.num_logical_cores; - cpu_id += topo.num_hw_threads) { - bool found_it = false; - for (std::size_t i = 0; i != thread_pools_.size(); i++) { - if (hpx::threads::test(initial_masks_[i], cpu_id)) - found_it = true; - } - - if (!found_it) { -#ifdef DEBUG_ - std::cout << " setting cpu_id " << cpu_id << " back online \n" - << std::flush; -#endif - - hardware_reconf::make_cpus_online(cpu_id, cpu_id + topo.num_hw_threads); - } - } - - std::string governor = "ondemand"; - policy.governor = const_cast(governor.c_str()); - std::cout << "Set CPU governors back to " << governor << std::endl; - for (int cpu_id = 0; cpu_id < topo.num_logical_cores; - cpu_id += topo.num_hw_threads) - int res = hardware_reconf::set_freq_policy(cpu_id, policy); -#endif - } - */ - stopped_ = true; - // work_queue_cv_.notify_all(); - // std::cout << "rank(" << rank_ << "): scheduled " << count_ << "\n"; - /* Output all measured metrics */ #ifdef DEBUG_MULTIOBJECTIVE_ @@ -1454,14 +1372,13 @@ void scheduler::stop() { last_measure_threads = timestamp_now; update_active_osthreads(active_threads, dt_threads); -#ifdef ALLSCALE_HAVE_CPUFREQ - allscale::components::monitor *monitor_c = &allscale::monitor::get(); + allscale::components::monitor *monitor_c = &allscale::monitor::get(); auto measurement = monitor_c->get_current_power(); if ( measurement <= 10000 ) { update_power_consumption(measurement, dt_power); } -#endif + if ( meas_active_threads_count == 0 ) meas_active_threads_count = 1; if ( meas_power_count == 0 ) @@ -1469,7 +1386,6 @@ void scheduler::stop() { std::cout << "\n****************************************************\n" << std::flush; std::cout << "Measured Metrics of Application Execution:\n" - << "\tTotal number of tasks scheduled locally (#taskslocal) = " << nr_tasks_scheduled << std::endl @@ -1502,5 +1418,6 @@ void scheduler::stop() { #endif } -} -} + +} // components +} // allscale diff --git a/src/dashboard.cpp b/src/dashboard.cpp index d02f98b..d45c8e0 100644 --- a/src/dashboard.cpp +++ b/src/dashboard.cpp @@ -60,7 +60,7 @@ namespace allscale { namespace dashboard state.productive_cycles_per_second = float(state.cur_frequency) * (1.f - state.idle_rate); // freq to Hz -#if defined(ALLSCALE_HAVE_CPUFREQ) || defined(ALTERNATIVE_SCORE) +#if defined(ALTERNATIVE_SCORE) state.speed = monitor_c->get_avg_time_last_iterations(100); state.efficiency = active_cores; #else @@ -71,8 +71,12 @@ namespace allscale { namespace dashboard #if defined(POWER_ESTIMATE) || defined(ALLSCALE_HAVE_CPUFREQ) state.cur_power = monitor_c->get_current_power(); state.max_power = monitor_c->get_max_power(); - state.power = state.cur_power / state.max_power; +#else + state.max_power = 1.0; + state.cur_power = 1.0; #endif + state.power = state.cur_power / state.max_power; + return state; } }} @@ -171,7 +175,7 @@ namespace allscale { namespace dashboard float system_state::score() const { -#if defined(ALLSCALE_HAVE_CPUFREQ) || defined(ALTERNATIVE_SCORE) +#if defined(ALTERNATIVE_SCORE) return std::exp(speed * speed_exponent) * std::exp(efficiency * efficiency_exponent ) * std::exp(power * power_exponent); @@ -509,7 +513,7 @@ namespace allscale { namespace dashboard state.speed = total_speed / client.localities_.size(); // state.speed = std::pow(total_speed, 1.f/client.localities_.size()); -#if defined(ALLSCALE_HAVE_CPUFREQ) || defined(ALTERNATIVE_SCORE) +#if defined(ALTERNATIVE_SCORE) // VV: This is the number of active threads state.efficiency = total_efficiency; #else From d8346fa83128c3bc215f16c79a4271bceb3364ce Mon Sep 17 00:00:00 2001 From: Vassilis Vassiliadis Date: Tue, 20 Nov 2018 14:07:00 +0000 Subject: [PATCH 22/37] Added better scaling functionality to NMD --- allscale/components/localoptimizer.hpp | 23 ++----- allscale/components/nmsimplex_bbincr.hpp | 12 +++- allscale/optimizer.hpp | 8 ++- src/components/localoptimizer.cpp | 46 ++++++++++++- src/components/nmsimplex_bbincr.cpp | 82 ++++++++++++++++++------ src/optimizer.cpp | 26 ++++++-- 6 files changed, 151 insertions(+), 46 deletions(-) diff --git a/allscale/components/localoptimizer.hpp b/allscale/components/localoptimizer.hpp index 1a04e9d..55785fa 100644 --- a/allscale/components/localoptimizer.hpp +++ b/allscale/components/localoptimizer.hpp @@ -49,23 +49,7 @@ struct actuation struct localoptimizer { - localoptimizer() - : pending_threads(0.), - pending_energy(0.), - pending_time(0.), - pending_num_times(0.), - mo_initialized(false), - frequency_param_(0), - converged_(false), - convergence_threshold_(0.005), - time_weight(0.0), - energy_weight(0.0), - resource_weight(0.0), - nmd(0.005) - { - if (optmethod_ == random) - srand(std::time(NULL)); - } + localoptimizer(); bool isConverged(); double evaluate_score(const double objectives[]); void setPolicy(searchPolicy pol) @@ -98,11 +82,12 @@ struct localoptimizer *resource_weight = this->resource_weight; } + void set_objectives_scale(const double objectives_scale[3]); + std::size_t getCurrentThreads() { return threads_param_; } void setCurrentThreads(std::size_t threads) { threads_param_ = threads; } - unsigned int getCurrentFrequencyIdx() { return frequency_param_; @@ -233,6 +218,8 @@ struct localoptimizer /* set to true if local optimizer has converged over all objectives */ bool converged_; + + double objectives_scale[3]; }; } // namespace components } // namespace allscale diff --git a/allscale/components/nmsimplex_bbincr.hpp b/allscale/components/nmsimplex_bbincr.hpp index 81704c3..8ad4422 100644 --- a/allscale/components/nmsimplex_bbincr.hpp +++ b/allscale/components/nmsimplex_bbincr.hpp @@ -108,6 +108,8 @@ class NelderMead void print_initial_simplex(); void print_iteration(); + void set_scale(const double scale[NMD_NUM_OBJECTIVES]); + double *getMinVertices() { return v[vs]; @@ -132,9 +134,12 @@ class NelderMead optstepresult step(const double objectives[], double knob1, double knob2); + void invalidate_cache(); + void reevaluate_scores(); + private: int warming_up_step; - + bool should_invalidate_cache, should_reevaluate_scores; double max_power_, max_time_; // VV: Utility to make sure that we generate new values and not something that already @@ -148,6 +153,9 @@ class NelderMead //VV: objective_type: { : optstepresult } MapCache_t cache_; + void do_invalidate_cache(); + void do_reevaluate_scores(); + optstepresult do_step_start(); optstepresult do_step_reflect(const double objectives[], double knob1, double knob2); @@ -170,7 +178,7 @@ class NelderMead bool convergence_reevaluating; int initial_configurations[NMD_NUM_KNOBS+1][NMD_NUM_KNOBS]; - + double scale[NMD_NUM_OBJECTIVES]; /* vertex with smallest value */ int vs; diff --git a/allscale/optimizer.hpp b/allscale/optimizer.hpp index 4452bea..b2a1ce8 100644 --- a/allscale/optimizer.hpp +++ b/allscale/optimizer.hpp @@ -99,7 +99,11 @@ namespace allscale { , threads_min(other.threads_min) , threads_max(other.threads_max) , previous_num_nodes(other.previous_num_nodes) - {} + { + objectives_scale[0] = other.objectives_scale[0]; + objectives_scale[1] = other.objectives_scale[1]; + objectives_scale[2] = other.objectives_scale[2]; + } bool active() const { @@ -141,6 +145,8 @@ namespace allscale { components::internode_optimizer_t o_ino; components::NelderMead nmd; + + double objectives_scale[3]; }; } diff --git a/src/components/localoptimizer.cpp b/src/components/localoptimizer.cpp index 6676738..ae026a7 100644 --- a/src/components/localoptimizer.cpp +++ b/src/components/localoptimizer.cpp @@ -25,6 +25,32 @@ namespace allscale namespace components { +localoptimizer::localoptimizer() + + : pending_threads(0.), + pending_energy(0.), + pending_time(0.), + pending_num_times(0.), + mo_initialized(false), + frequency_param_(0), + converged_(false), + convergence_threshold_(0.005), + time_weight(0.0), + energy_weight(0.0), + resource_weight(0.0), + nmd(0.005) + { + if (optmethod_ == random) + srand(std::time(NULL)); + + // VV: Start with 500ms as the guestimation of max iteration time + objectives_scale[0] = 0.5; + objectives_scale[1] = 1.0; + objectives_scale[2] = 1.0; + + nmd.set_scale(objectives_scale); + } + double localoptimizer::evaluate_score(const double objectives[]) { if ( mo_initialized ) { @@ -120,6 +146,7 @@ void localoptimizer::setmaxthreads(std::size_t threads) { max_threads_=threads; threads_param_=threads; + #if 0 double threads_tick = threads / 5.; @@ -157,6 +184,8 @@ void localoptimizer::initialize_nmd(bool from_scratch) #endif const double opt_weights[] = { time_weight, energy_weight, resource_weight }; + nmd.set_scale(objectives_scale); + if( from_scratch == false ){ double prev_simplex[NMD_NUM_KNOBS+1][NMD_NUM_KNOBS]; @@ -178,16 +207,31 @@ void localoptimizer::initialize_nmd(bool from_scratch) converged_ = false; } +void localoptimizer::set_objectives_scale(const double objectives_scale[3]) +{ + for (auto i=0ul; iobjectives_scale[i] = objectives_scale[i]; + + nmd.set_scale(objectives_scale); +} + void localoptimizer::measureObjective(double iter_time, double power, double threads) { + // VV: iter_time has no bound, threads has bound @max_threads_ + // and power 1.0 + std::cout << "Measuring objective: " << iter_time << " " << power << " " << threads << std::endl; + if ( objectives_scale[0] < iter_time ) { + objectives_scale[0] = iter_time * 2.0; + set_objectives_scale(objectives_scale); + } pending_time += iter_time; pending_energy += power; - pending_threads += threads; + pending_threads += threads / max_threads_; pending_num_times++; } diff --git a/src/components/nmsimplex_bbincr.cpp b/src/components/nmsimplex_bbincr.cpp index 7fb76b5..ee49ae1 100644 --- a/src/components/nmsimplex_bbincr.cpp +++ b/src/components/nmsimplex_bbincr.cpp @@ -33,8 +33,6 @@ NelderMead::NelderMead(const NelderMead &other) { EPSILON = other.EPSILON; state_ = other.state_; - max_power_ = other.max_power_; - max_time_ = other.max_time_; cache_.insert(other.cache_.begin(), other.cache_.end()); warming_up_step = other.warming_up_step; @@ -54,8 +52,10 @@ NelderMead::NelderMead(const NelderMead &other) vm[i] = other.vm[i]; } - for ( auto i=0; i NelderMead::explore_next_extra(double *extra, int level, @@ -301,19 +300,52 @@ bool NelderMead::cache_update(int threads, int freq_idx, return false; } +void NelderMead::invalidate_cache() +{ + should_invalidate_cache = true; +} + +void NelderMead::reevaluate_scores() +{ + should_reevaluate_scores = true; +} + +void NelderMead::do_invalidate_cache() +{ + cache_.clear(); + should_invalidate_cache = false; +} + +void NelderMead::do_reevaluate_scores() +{ + auto timestamp_now = std::chrono::time_point_cast(std::chrono::system_clock::now()).time_since_epoch().count(); + + for (auto i=0ul; isecond.objectives, opt_weights); + } + } + + should_reevaluate_scores = false; +} + +void NelderMead::set_scale(const double scale[NMD_NUM_OBJECTIVES]) +{ + for ( auto i=0ul; iscale[i] = scale[i]; + + reevaluate_scores(); +} + double NelderMead::evaluate_score(const double objectives[], const double *weights) { double score; // VV: [time, energy/power, resources] - double scale[] = {1.0, 1.0, 1.0}; - // max_time_ = max_time_ > objectives[0] ? max_time_ : objectives[0]; - // max_power_ = max_power_ > objectives[2] ? max_power_ : objectives[2]; - - scale[0] = max_time_; - scale[1] = max_power_; - scale[2] = (double)constraint_max[0]; - if (weights == nullptr) weights = opt_weights; @@ -476,8 +508,9 @@ void NelderMead::initialize_simplex(const double weights[3], void NelderMead::print_initial_simplex() { int i, j; - std::cout << "[NelderMead DEBUG] Initial Values\n"; - + std::cout << "[NelderMead DEBUG] Initial Values (Order indices:" + << vs << ", " << vh << ", " << vg << ")" << std::endl; + for (j = 0; j < NMD_NUM_KNOBS + 1; j++) { @@ -1005,15 +1038,19 @@ optstepresult NelderMead::step(const double objectives[], optstepresult res; res.threads = 0; res.freq_idx = -1; + OUT_DEBUG( + auto score = evaluate_score(objectives, nullptr); + std::cout << "[NelderMead|DEBUG] Starting step with " << objectives[0] << " " << objectives[1] << " " - << objectives[2] << std::endl; + << objectives[2] << " score " << score << std::endl; ) std::size_t tested_combinations = cache_.size(); - + + #if 0 evaluate_score(objectives, nullptr); for (i=0; isecond.objectives, nullptr); } } + #endif + if ( should_invalidate_cache ) + do_invalidate_cache(); + + if ( should_reevaluate_scores ) + do_reevaluate_scores(); + switch (state_) { case warmup: diff --git a/src/optimizer.cpp b/src/optimizer.cpp index 924a9f3..b11ffbb 100644 --- a/src/optimizer.cpp +++ b/src/optimizer.cpp @@ -247,6 +247,11 @@ global_optimizer::global_optimizer() if ( c_threads_max ) threads_max = atoi(c_threads_max); } + + // VV: Guestimate that max iter time is 500 ms (will be refined over time) + objectives_scale[0] = 0.5; + objectives_scale[1] = 1.0; + objectives_scale[2] = 1.0; } void global_optimizer::signal_objective_changed() @@ -466,6 +471,7 @@ hpx::future global_optimizer::balance_ino_nmd(const std::vector > future_state) { std::lock_guard l(mtx_); + std::size_t num_active_nodes = std::count(active_nodes_.begin(), active_nodes_.end(), true); auto state = future_state.get(); float avg_time = 0; @@ -476,10 +482,15 @@ hpx::future global_optimizer::balance_ino_nmd(const std::vector 0.0) { avg_time += s.avg_time_; num_avg_time ++; } + avg_energy += s.energy_; avg_threads += s.active_cores_per_node_ / (float) s.cores_per_node_; @@ -491,14 +502,19 @@ hpx::future global_optimizer::balance_ino_nmd(const std::vector global_optimizer::balance_ino_nmd(const std::vector global_optimizer::balance_ino_nmd(const std::vector(); std::size_t cur_node = 0ul; From 392b748be9a19129ccd11a14402794efa469e051 Mon Sep 17 00:00:00 2001 From: Vassilis Vassiliadis Date: Tue, 20 Nov 2018 14:45:14 +0000 Subject: [PATCH 23/37] Working towards integrating global and local scheduler --- allscale/components/nmsimplex_bbincr.hpp | 3 +- allscale/components/scheduler.hpp | 2 + allscale/dashboard.hpp | 2 + allscale/optimizer.hpp | 7 +++- src/components/localoptimizer.cpp | 6 ++- src/components/nmsimplex_bbincr.cpp | 48 +++++++++++++++++++++++- src/components/scheduler_component.cpp | 6 ++- src/dashboard.cpp | 22 +++++++++-- src/optimizer.cpp | 18 ++++++++- src/scheduler.cpp | 28 ++++++++++++++ 10 files changed, 131 insertions(+), 11 deletions(-) diff --git a/allscale/components/nmsimplex_bbincr.hpp b/allscale/components/nmsimplex_bbincr.hpp index 8ad4422..f630b23 100644 --- a/allscale/components/nmsimplex_bbincr.hpp +++ b/allscale/components/nmsimplex_bbincr.hpp @@ -100,10 +100,11 @@ class NelderMead const double initial_simplex[][NMD_NUM_KNOBS], const double constraint_min[NMD_NUM_KNOBS], const double constraint_max[NMD_NUM_KNOBS]); - + /* void initialize_simplex(const double weights[NMD_NUM_OBJECTIVES], const double constraint_min[NMD_NUM_KNOBS], const double constraint_max[NMD_NUM_KNOBS]); + */ void print_initial_simplex(); void print_iteration(); diff --git a/allscale/components/scheduler.hpp b/allscale/components/scheduler.hpp index 90d32e2..1ce0336 100644 --- a/allscale/components/scheduler.hpp +++ b/allscale/components/scheduler.hpp @@ -46,6 +46,8 @@ namespace allscale { namespace components { HPX_ASSERT(false); } + bool get_optimization_score(); + scheduler(std::uint64_t rank); void init(); diff --git a/allscale/dashboard.hpp b/allscale/dashboard.hpp index 385f4f1..eb77398 100644 --- a/allscale/dashboard.hpp +++ b/allscale/dashboard.hpp @@ -91,6 +91,8 @@ namespace allscale { namespace dashboard float power = 0; std::string to_json() const; + + float last_local_score; template void serialize(Archive& ar, unsigned); diff --git a/allscale/optimizer.hpp b/allscale/optimizer.hpp index b2a1ce8..a255497 100644 --- a/allscale/optimizer.hpp +++ b/allscale/optimizer.hpp @@ -99,6 +99,8 @@ namespace allscale { , threads_min(other.threads_min) , threads_max(other.threads_max) , previous_num_nodes(other.previous_num_nodes) + , use_lopt(other.use_lopt) + , last_optimization_score(other.last_optimization_score) { objectives_scale[0] = other.objectives_scale[0]; objectives_scale[1] = other.objectives_scale[1]; @@ -110,6 +112,8 @@ namespace allscale { return active_; } + double get_optimization_score(); + hpx::future balance(bool); hpx::future balance_ino(const std::vector &old_mapping); hpx::future balance_ino_nmd(const std::vector &old_mapping); @@ -145,8 +149,9 @@ namespace allscale { components::internode_optimizer_t o_ino; components::NelderMead nmd; - + double last_optimization_score; double objectives_scale[3]; + bool use_lopt; }; } diff --git a/src/components/localoptimizer.cpp b/src/components/localoptimizer.cpp index ae026a7..97f6dbd 100644 --- a/src/components/localoptimizer.cpp +++ b/src/components/localoptimizer.cpp @@ -172,7 +172,11 @@ void localoptimizer::setmaxthreads(std::size_t threads) void localoptimizer::initialize_nmd(bool from_scratch) { // VV: Place constraints to #threads and cpu_freq tunable knobs + int min_threads = 0.25 * max_threads_/((double)threads_dt); + if ( min_threads < 1 ) + min_threads = 1; + double constraint_min[] = {1, 0}; #if defined(ALLSCALE_HAVE_CPUFREQ) double constraint_max[] = {ceil(max_threads_/(double)threads_dt), @@ -225,7 +229,7 @@ void localoptimizer::measureObjective(double iter_time, double power, double thr << power << " " << threads << std::endl; if ( objectives_scale[0] < iter_time ) { - objectives_scale[0] = iter_time * 2.0; + objectives_scale[0] = iter_time * 1.1; set_objectives_scale(objectives_scale); } diff --git a/src/components/nmsimplex_bbincr.cpp b/src/components/nmsimplex_bbincr.cpp index ee49ae1..be46a41 100644 --- a/src/components/nmsimplex_bbincr.cpp +++ b/src/components/nmsimplex_bbincr.cpp @@ -331,6 +331,9 @@ void NelderMead::do_reevaluate_scores() } should_reevaluate_scores = false; + + sort_vertices(); + centroid(); } void NelderMead::set_scale(const double scale[NMD_NUM_OBJECTIVES]) @@ -377,7 +380,7 @@ void NelderMead::set_weights(const double weights[3]) << opt_weights[2] << std::endl; ) } - +#if 0 void NelderMead::initialize_simplex(const double weights[3], const double constraint_min[2], const double constraint_max[2]) @@ -391,6 +394,14 @@ void NelderMead::initialize_simplex(const double weights[3], this->constraint_max[i] = constraint_max[i]; } + OUT_DEBUG( + std::cout << "[NelderMead|Debug] Initialize contraints " << std::endl; + std::cout << constraint_min[0] + << ":" << constraint_max[0] << std::endl; + std::cout << constraint_min[1] + << ":" << constraint_max[1] << std::endl; + ) + set_weights(weights); state_ = warmup; itr = 0; @@ -424,6 +435,7 @@ void NelderMead::initialize_simplex(const double weights[3], ) } } +#endif /* FIXME: generalize */ void NelderMead::initialize_simplex(const double weights[3], @@ -440,6 +452,14 @@ void NelderMead::initialize_simplex(const double weights[3], this->constraint_max[i] = constraint_max[i]; } + OUT_DEBUG( + std::cout << "[NelderMead|Debug] Initialize contraints " << std::endl; + std::cout << constraint_min[0] + << ":" << constraint_max[0] << std::endl; + std::cout << constraint_min[1] + << ":" << constraint_max[1] << std::endl; + ) + set_weights(weights); state_ = warmup; itr = 0; @@ -580,6 +600,11 @@ void NelderMead::centroid() } vm[j] = cent / n; } + + OUT_DEBUG ( + std::cout << "[NelderMead|DEBUG] New Centroid: " + << vm[0] << " " << vm[1] << std::endl; + ) } void NelderMead::sort_vertices() @@ -1041,7 +1066,7 @@ optstepresult NelderMead::step(const double objectives[], OUT_DEBUG( auto score = evaluate_score(objectives, nullptr); - + std::cout << "[NelderMead|DEBUG] Starting step with " << objectives[0] << " " << objectives[1] << " " @@ -1253,6 +1278,25 @@ bool NelderMead::testConvergence(std::size_t tested_combinations) } if ( ret == true && convergence_reevaluating == true ) { + // VV: Now find the best result from cache + sort_vertices(); + + double best_knobs[NMD_NUM_KNOBS] = { v[vs][0], v[vs][1]}; + double best_score = f[vs]; + + for ( const auto & entry: cache_ ) { + auto cur_score = evaluate_score(entry.second.objectives, nullptr); + if ( cur_score < best_score) { + best_knobs[0] = entry.second.threads; + best_knobs[1] = entry.second.freq_idx; + + best_score = cur_score; + } + } + + v[vs][0] = best_knobs[0]; + v[vs][1] = best_knobs[1]; + f[vs] = best_score; return true; } else if ( ret == true ) { // VV: Do another final run to make sure that the objective scores still hold up diff --git a/src/components/scheduler_component.cpp b/src/components/scheduler_component.cpp index 76ed483..13cc0d7 100644 --- a/src/components/scheduler_component.cpp +++ b/src/components/scheduler_component.cpp @@ -51,7 +51,7 @@ scheduler::scheduler(std::uint64_t rank) last_power_usage(0), power_sum(0), power_count(0), - sampling_interval(10), + sampling_interval(3), current_avg_iter_time(0.0), multi_objectives(false), time_requested(false), @@ -661,6 +661,10 @@ void scheduler::optimize_locally(work_item const& work) power_sum=0; last_objective_score = lopt_.evaluate_score(last_objectives); + + auto power_dt = t_duration_now - last_measure_power; + update_power_consumption(power_sum/last_power_usage, power_dt); + last_measure_power = t_duration_now; } elapsedTimeMs = t_duration_now - last_optimization_timestamp_; diff --git a/src/dashboard.cpp b/src/dashboard.cpp index d45c8e0..ae5320b 100644 --- a/src/dashboard.cpp +++ b/src/dashboard.cpp @@ -57,7 +57,7 @@ namespace allscale { namespace dashboard state.max_frequency = monitor_c->get_max_freq(0); std::size_t active_cores = scheduler::get().get_active_threads(); - + state.last_local_score = scheduler::get().get_last_objective_score(); state.productive_cycles_per_second = float(state.cur_frequency) * (1.f - state.idle_rate); // freq to Hz #if defined(ALTERNATIVE_SCORE) @@ -76,7 +76,7 @@ namespace allscale { namespace dashboard state.cur_power = 1.0; #endif state.power = state.cur_power / state.max_power; - + return state; } }} @@ -110,6 +110,7 @@ namespace allscale { namespace dashboard ar & speed; ar & efficiency; ar & power; + ar & last_local_score; } std::string node_state::to_json() const @@ -225,7 +226,19 @@ namespace allscale { namespace dashboard const char* host_env = std::getenv(ENVVAR_DASHBOARD_IP); const char* port_env = std::getenv(ENVVAR_DASHBOARD_PORT); + char *const c_policy = std::getenv("ALLSCALE_SCHEDULING_POLICY"); + std::string input_objective_str = hpx::get_config_entry("allscale.objective", ""); + if (c_policy && strcasecmp(c_policy, "ino") == 0 ) + use_gopt = true; + else + use_gopt = false; + + if ( input_objective_str == "allscale" ) + use_lopt = true; + else + use_lopt = false; + std::string host; if (host_env) { @@ -315,11 +328,11 @@ namespace allscale { namespace dashboard buffers[0] = boost::asio::buffer(&m->msg_size, sizeof(std::uint64_t)); buffers[1] = boost::asio::buffer(m->json.data(), m->json.length()); -/* + /* std::cout << "Sending -----------------------------------\n"; std::cout << m->json << '\n'; std::cout << "Sending done ------------------------------\n"; -*/ + */ boost::asio::async_write(socket_, buffers, [f = std::move(f), m](boost::system::error_code ec, std::size_t /*length*/) { @@ -448,6 +461,7 @@ namespace allscale { namespace dashboard std::vector localities_; std::uint64_t time = 0; bool enabled_; + double use_gopt, use_lopt; }; dashboard_client& dashboard_client::get() diff --git a/src/optimizer.cpp b/src/optimizer.cpp index b11ffbb..1733351 100644 --- a/src/optimizer.cpp +++ b/src/optimizer.cpp @@ -200,9 +200,17 @@ global_optimizer::global_optimizer() f_resource_max(-1.0f), f_resource_leeway(-1.0f), nmd(0.005), nmd_initialized(0), - nodes_min(1), nodes_max(localities_.size()), threads_min(0), threads_max(0) + nodes_min(1), nodes_max(localities_.size()), threads_min(0), threads_max(0), + last_optimization_score(1.0) { char *const c_policy = std::getenv("ALLSCALE_SCHEDULING_POLICY"); + std::string input_objective_str = + hpx::get_config_entry("allscale.objective", ""); + + if ( input_objective_str == "allscale" ) + use_lopt = true; + else + use_lopt = false; previous_num_nodes = localities_.size(); if (c_policy && strcasecmp(c_policy, "ino") == 0 ) @@ -254,6 +262,11 @@ global_optimizer::global_optimizer() objectives_scale[2] = 1.0; } +double global_optimizer::get_optimization_score() +{ + return last_optimization_score; +} + void global_optimizer::signal_objective_changed() { const double new_weights[3] = { @@ -536,6 +549,9 @@ hpx::future global_optimizer::balance_ino_nmd(const std::vector l(optimizer_.mtx_); @@ -736,6 +747,23 @@ namespace allscale ); } + double get_last_objective_score() + { + std::vector scores; + + runtime::HierarchicalOverlayNetwork::forAllLocal( + [&](scheduler_service& sched) + { + scores.push_back(sched.get_last_objective_score()); + } + ); + + std::cout << "GET_LAST_OBJETIVE_SCORE (SCHED): got " << scores.size() << " values" << std::endl; + for (const auto &score: scores ) { + std::cout << score << std::endl; + } + } + void set_efficiency_exponent_broadcast(float exp) { runtime::HierarchicalOverlayNetwork::forAllLocal( From 25ba362779c7a92b11ee7905b7b5ad1cf50d0338 Mon Sep 17 00:00:00 2001 From: Vassilis Vassiliadis Date: Tue, 20 Nov 2018 17:33:24 +0000 Subject: [PATCH 24/37] Finalizing integration --- allscale/components/localoptimizer.hpp | 2 +- allscale/components/nmsimplex_bbincr.hpp | 7 +++ allscale/components/scheduler.hpp | 2 + allscale/scheduler.hpp | 1 + src/components/localoptimizer.cpp | 63 +++++++++++++++++++++--- src/components/nmsimplex_bbincr.cpp | 31 +++++++++--- src/components/scheduler_component.cpp | 28 +++++++++++ src/dashboard.cpp | 12 ----- src/optimizer.cpp | 7 ++- src/scheduler.cpp | 34 ++++++++++++- 10 files changed, 158 insertions(+), 29 deletions(-) diff --git a/allscale/components/localoptimizer.hpp b/allscale/components/localoptimizer.hpp index 55785fa..c5e6afc 100644 --- a/allscale/components/localoptimizer.hpp +++ b/allscale/components/localoptimizer.hpp @@ -64,7 +64,7 @@ struct localoptimizer } void initialize_nmd(bool from_scratch); searchPolicy getPolicy() { return optmethod_; } - + // VV: Modifying the objectives triggers restarting the optimizer void setobjectives(double time_weight, double energy_weight, diff --git a/allscale/components/nmsimplex_bbincr.hpp b/allscale/components/nmsimplex_bbincr.hpp index f630b23..974a0c1 100644 --- a/allscale/components/nmsimplex_bbincr.hpp +++ b/allscale/components/nmsimplex_bbincr.hpp @@ -138,6 +138,9 @@ class NelderMead void invalidate_cache(); void reevaluate_scores(); + void update_constraints(const double constraint_min[NMD_NUM_KNOBS], + const double constraint_max[NMD_NUM_KNOBS]); + private: int warming_up_step; bool should_invalidate_cache, should_reevaluate_scores; @@ -241,6 +244,10 @@ class NelderMead double constraint_max[2]; double opt_weights[NMD_NUM_OBJECTIVES]; + + double next_constraint_min[NMD_NUM_KNOBS], + next_constraint_max[NMD_NUM_KNOBS]; + bool should_update_constraints = false; }; } // namespace components diff --git a/allscale/components/scheduler.hpp b/allscale/components/scheduler.hpp index 1ce0336..9eb9fbf 100644 --- a/allscale/components/scheduler.hpp +++ b/allscale/components/scheduler.hpp @@ -79,6 +79,8 @@ namespace allscale { namespace components { double *energy_weight, double *resource_weight); + void update_max_threads(std::size_t max_threads); + double get_last_objective_score() { return last_objective_score; } diff --git a/allscale/scheduler.hpp b/allscale/scheduler.hpp index 8cf6006..f448ad5 100644 --- a/allscale/scheduler.hpp +++ b/allscale/scheduler.hpp @@ -48,6 +48,7 @@ namespace allscale static HPX_EXPORT void update_policy(task_times const& times, std::vector mask, std::uint64_t frequency); static void apply_new_mapping(const std::vector &new_mapping); + static void update_max_threads(std::size_t max_threads); static HPX_EXPORT void schedule(work_item&& work); static HPX_EXPORT components::scheduler* run(std::size_t rank); diff --git a/src/components/localoptimizer.cpp b/src/components/localoptimizer.cpp index 97f6dbd..1a313b1 100644 --- a/src/components/localoptimizer.cpp +++ b/src/components/localoptimizer.cpp @@ -88,7 +88,7 @@ void localoptimizer::printobjectives() { std::cout << "[LocalOptimizer|DEBUG] Weights=[time:" << time_weight << ", energy:" << energy_weight - << ", resource:" << resource_weight << "]" << std::endl; + << ", resource:" << resource_weight << "]" << std::endl << std::flush; } #endif @@ -166,25 +166,73 @@ void localoptimizer::setmaxthreads(std::size_t threads) #else threads_dt = 1.; #endif -} + + if ( mo_initialized ) { + if ( converged_ == false ) { + initialize_nmd(true); + } else { + double factor; + int min_freq = 0; + int max_freq = frequencies_param_allowed_.size() - 1; + + if ( time_weight >= energy_weight + resource_weight) { + factor = 0.5; + min_freq = frequencies_param_allowed_.size() / 4; + } + else { + factor = 0.25; + max_freq = max_freq / 2; + } + + int min_threads = factor * max_threads_/((double)threads_dt); + if ( min_threads < 1 ) + min_threads = 1; + + double constraint_min[] = {min_threads, min_freq}; + #if defined(ALLSCALE_HAVE_CPUFREQ) + double constraint_max[] = {ceil(max_threads_/(double)threads_dt), + (double)max_freq}; + #else + std::cout << "Allowed frequencies: " << frequencies_param_allowed_.size() << std::endl; + double constraint_max[] = {ceil(max_threads_/(double)threads_dt), + 0.0}; + #endif + + nmd.update_constraints(constraint_min, constraint_max); + } + } +} void localoptimizer::initialize_nmd(bool from_scratch) { // VV: Place constraints to #threads and cpu_freq tunable knobs - int min_threads = 0.25 * max_threads_/((double)threads_dt); + double factor; + int min_freq = 0; + int max_freq = frequencies_param_allowed_.size() - 1; + + if ( time_weight >= energy_weight + resource_weight) { + factor = 0.5; + min_freq = frequencies_param_allowed_.size() / 4; + } + else { + factor = 0.25; + max_freq = max_freq / 2; + } + + int min_threads = factor * max_threads_/((double)threads_dt); if ( min_threads < 1 ) min_threads = 1; - double constraint_min[] = {1, 0}; + double constraint_min[] = {min_threads, min_freq}; #if defined(ALLSCALE_HAVE_CPUFREQ) double constraint_max[] = {ceil(max_threads_/(double)threads_dt), - (double)frequencies_param_allowed_.size() - 1}; + (double)max_freq}; #else std::cout << "Allowed frequencies: " << frequencies_param_allowed_.size() << std::endl; double constraint_max[] = {ceil(max_threads_/(double)threads_dt), - 0.0}; + 0.0}; #endif const double opt_weights[] = { time_weight, energy_weight, resource_weight }; @@ -274,7 +322,7 @@ actuation localoptimizer::step(std::size_t active_threads) pending_threads}; reset_accumulated_measurements(); - if ( explore_knob_domain ){ + if ( converged_ == false ){ optstepresult nmd_res = nmd.step(latest_measurements, active_threads, frequency_param_); @@ -300,7 +348,6 @@ actuation localoptimizer::step(std::size_t active_threads) act.frequency_idx = minimization_point[1]; // VV: Stop searching for new knob_set - explore_knob_domain = false; converged_ = true; } else { // VV: Have not converged yet, keep exploring diff --git a/src/components/nmsimplex_bbincr.cpp b/src/components/nmsimplex_bbincr.cpp index be46a41..db01d43 100644 --- a/src/components/nmsimplex_bbincr.cpp +++ b/src/components/nmsimplex_bbincr.cpp @@ -64,6 +64,8 @@ NelderMead::NelderMead(const NelderMead &other) initial_configurations[i][j] = other.initial_configurations[i][j]; } } + + should_update_constraints = true; } //NelderMead::NelderMead(double (*objfunc)(double[]),double eps){ @@ -437,6 +439,17 @@ void NelderMead::initialize_simplex(const double weights[3], } #endif +void NelderMead::update_constraints(const double constraint_min[NMD_NUM_KNOBS], + const double constraint_max[NMD_NUM_KNOBS]) +{ + for (auto i=0; i(std::chrono::system_clock::now()).time_since_epoch().count(); - for (i = 0; i < NMD_NUM_KNOBS; i++) - { - this->constraint_min[i] = constraint_min[i]; - this->constraint_max[i] = constraint_max[i]; - } + update_constraints(constraint_min, constraint_max); OUT_DEBUG( std::cout << "[NelderMead|Debug] Initialize contraints " << std::endl; @@ -459,7 +468,7 @@ void NelderMead::initialize_simplex(const double weights[3], std::cout << constraint_min[1] << ":" << constraint_max[1] << std::endl; ) - + set_weights(weights); state_ = warmup; itr = 0; @@ -554,6 +563,7 @@ void NelderMead::print_initial_simplex() << e->second.objectives[2] << " " << std::endl; } + std::cout << std::flush; } } @@ -1073,6 +1083,15 @@ optstepresult NelderMead::step(const double objectives[], << objectives[2] << " score " << score << std::endl; ) + if ( should_update_constraints ) { + for (i=0; i max_threads ) + suspend_threads(active_threads - max_threads); + else if ( active_threads < max_threads ) + resume_threads(max_threads - active_threads); +} + void scheduler::set_local_optimizer_weights(double time_weight, double energy_weight, double resource_weight) diff --git a/src/dashboard.cpp b/src/dashboard.cpp index ae5320b..52a6890 100644 --- a/src/dashboard.cpp +++ b/src/dashboard.cpp @@ -226,18 +226,6 @@ namespace allscale { namespace dashboard const char* host_env = std::getenv(ENVVAR_DASHBOARD_IP); const char* port_env = std::getenv(ENVVAR_DASHBOARD_PORT); - char *const c_policy = std::getenv("ALLSCALE_SCHEDULING_POLICY"); - std::string input_objective_str = hpx::get_config_entry("allscale.objective", ""); - - if (c_policy && strcasecmp(c_policy, "ino") == 0 ) - use_gopt = true; - else - use_gopt = false; - - if ( input_objective_str == "allscale" ) - use_lopt = true; - else - use_lopt = false; std::string host; if (host_env) diff --git a/src/optimizer.cpp b/src/optimizer.cpp index 1733351..48312c2 100644 --- a/src/optimizer.cpp +++ b/src/optimizer.cpp @@ -100,11 +100,15 @@ namespace allscale scheduler::apply_new_mapping(new_mapping); } + void optimizer_update_max_threads(std::size_t max_threads) { + scheduler::update_max_threads(max_threads); + } } // namespace allscale HPX_PLAIN_DIRECT_ACTION(allscale::get_optimizer_state, allscale_get_optimizer_state_action); HPX_PLAIN_DIRECT_ACTION(allscale::optimizer_update_policy, allscale_optimizer_update_policy_action); HPX_PLAIN_DIRECT_ACTION(allscale::optimizer_update_policy_ino, allscale_optimizer_update_policy_action_ino); +HPX_PLAIN_DIRECT_ACTION(allscale::optimizer_update_max_threads, allscale_optimizer_update_max_threads); namespace allscale { @@ -757,7 +761,7 @@ hpx::future global_optimizer::balance_ino_nmd(const std::vector global_optimizer::balance_ino_nmd(const std::vector(localities_, new_mapping); } + hpx::lcos::broadcast_apply(localities_, new_threads_per_node); } }); } diff --git a/src/scheduler.cpp b/src/scheduler.cpp index fcb70c4..f1be5bc 100644 --- a/src/scheduler.cpp +++ b/src/scheduler.cpp @@ -304,6 +304,8 @@ namespace allscale , right_id_(std::move(other.right_id_)) , is_root_(other.is_root_) , optimizer_(std::move(other.optimizer_)) + , use_gopt(other.use_gopt) + , use_lopt(other.use_lopt) { HPX_ASSERT(false); } @@ -316,6 +318,20 @@ namespace allscale , parent_(here_.getParent()) , is_root_(here_ == root_) { + char *const c_policy = std::getenv("ALLSCALE_SCHEDULING_POLICY"); + std::string input_objective_str = hpx::get_config_entry("allscale.objective", ""); + + if (c_policy && strcasecmp(c_policy, "ino") == 0 ) + use_gopt = true; + else + use_gopt = false; + + if ( input_objective_str == "allscale" ) + use_lopt = true; + else + use_lopt = false; + + if (parent_.getRank() != scheduler::rank()) { parent_id_ = hpx::naming::get_id_from_locality_id( @@ -343,7 +359,7 @@ namespace allscale if (is_root_) run(); } - + std::string policy() { return policy_.policy(); @@ -356,6 +372,11 @@ namespace allscale tree_scheduling_policy::from_mapping(*policy_.policy_, new_mapping); } + void update_max_threads(std::size_t max_threads) { + auto &&local_scheduler = scheduler::get(); + local_scheduler.update_max_threads(max_threads); + } + void toggle_node(std::size_t locality_id) { { @@ -455,6 +476,7 @@ namespace allscale ); } + bool use_gopt, use_lopt; void set_policy(std::string policy) { @@ -844,6 +866,16 @@ namespace allscale monitor::get().set_cur_freq(freq); } + void scheduler::update_max_threads(std::size_t max_threads) + { + runtime::HierarchicalOverlayNetwork::forAllLocal( + [&](scheduler_service& sched) + { + sched.update_max_threads(max_threads); + } + ); + } + void scheduler::apply_new_mapping(const std::vector &new_mapping) { runtime::HierarchicalOverlayNetwork::forAllLocal( From 76afbda2ecf46adc1054c955ee68e2212084fc68 Mon Sep 17 00:00:00 2001 From: Vassilis Vassiliadis Date: Tue, 20 Nov 2018 18:27:51 +0000 Subject: [PATCH 25/37] Put an upper limit to how many times NMD is allowed to recurse due to measurement information that's cached --- allscale/components/nmsimplex_bbincr.hpp | 2 + src/components/nmsimplex_bbincr.cpp | 52 ++++++++++++++++-------- 2 files changed, 37 insertions(+), 17 deletions(-) diff --git a/allscale/components/nmsimplex_bbincr.hpp b/allscale/components/nmsimplex_bbincr.hpp index 974a0c1..11e5d09 100644 --- a/allscale/components/nmsimplex_bbincr.hpp +++ b/allscale/components/nmsimplex_bbincr.hpp @@ -248,6 +248,8 @@ class NelderMead double next_constraint_min[NMD_NUM_KNOBS], next_constraint_max[NMD_NUM_KNOBS]; bool should_update_constraints = false; + + int times_used_cached; }; } // namespace components diff --git a/src/components/nmsimplex_bbincr.cpp b/src/components/nmsimplex_bbincr.cpp index db01d43..4fe6de5 100644 --- a/src/components/nmsimplex_bbincr.cpp +++ b/src/components/nmsimplex_bbincr.cpp @@ -66,6 +66,7 @@ NelderMead::NelderMead(const NelderMead &other) } should_update_constraints = true; + times_used_cached = 0; } //NelderMead::NelderMead(double (*objfunc)(double[]),double eps){ @@ -551,13 +552,13 @@ void NelderMead::print_initial_simplex() const int freq_idx = (int) v[j][1]; auto e = cache_.find(std::make_pair(threads, freq_idx)); - std::cout << " Objective value = " << f[j]; + std::cout << " Objective value = "<< std::flush << f[j] << std::flush; if ( e == cache_.end() ) { - std::cout << " (not in cache)" << std::endl; + std::cout << " (not in cache)" << std::flush << std::endl; } else { - std::cout << " OBJs: " + std::cout << " OBJs: " << std::flush << e->second.objectives[0] << " " << e->second.objectives[1] << " " << e->second.objectives[2] << " " @@ -598,10 +599,10 @@ void NelderMead::centroid() int j, m; double cent; - for (j = 0; j <= n - 1; j++) + for (j = 0; j < NMD_NUM_KNOBS; j++) { cent = 0.0; - for (m = 0; m <= n; m++) + for (m = 0; m < NMD_NUM_KNOBS +1; m++) { if (m != vg) { @@ -611,6 +612,8 @@ void NelderMead::centroid() vm[j] = cent / n; } + my_constraints(vm); + OUT_DEBUG ( std::cout << "[NelderMead|DEBUG] New Centroid: " << vm[0] << " " << vm[1] << std::endl; @@ -621,7 +624,7 @@ void NelderMead::sort_vertices() { // VV: -1 is used for padding because the index to this map will never evaluate to 0 int map_to_index[] = { - -1, 0, 1, 0, 2, 0, 0, 0}; + 0, 0, 1, 0, 2, 0, 0, 0}; vg = vs = vh = 0; @@ -650,6 +653,7 @@ void NelderMead::sort_vertices() optstepresult NelderMead::do_step_start() { optstepresult res; + times_used_cached ++; OUT_DEBUG( std::cout << "[NelderMead DEBUG] State = Start" << std::endl; @@ -687,7 +691,8 @@ optstepresult NelderMead::do_step_start() auto entry = cache_.find(key); - if (entry != cache_.end()) + //VV: Fixme, remove recursion due to cache + if (entry != cache_.end() && times_used_cached < 15) { auto timestamp_now = std::chrono::time_point_cast(std::chrono::system_clock::now()).time_since_epoch().count(); auto dt = timestamp_now - entry->second._cache_timestamp; @@ -1073,6 +1078,7 @@ optstepresult NelderMead::step(const double objectives[], optstepresult res; res.threads = 0; res.freq_idx = -1; + times_used_cached = 0; OUT_DEBUG( auto score = evaluate_score(objectives, nullptr); @@ -1171,7 +1177,6 @@ optstepresult NelderMead::step(const double objectives[], std::cout << "[NelderMead|Warn] Unknown warmup step " << warming_up_step << std::endl; } - optstepresult res; res.objectives[0] = -1; res.objectives[1] = -1; res.objectives[2] = -1; @@ -1184,7 +1189,7 @@ optstepresult NelderMead::step(const double objectives[], v[warming_up_step][1] = res.freq_idx; warming_up_step++; - return res; + break; } break; case start: @@ -1209,16 +1214,29 @@ optstepresult NelderMead::step(const double objectives[], return res; } - res.converged = testConvergence(tested_combinations); - - if (res.converged == true) + if ( state_ != warmup ) { - res.threads = v[vs][0]; - res.freq_idx = v[vs][1]; - OUT_DEBUG( - std::cout << "[NelderMead|DEBUG] Converged to " << res.threads << " " << res.freq_idx << std::endl; - ) + res.converged = testConvergence(tested_combinations); + + if (res.converged == true) + { + res.threads = v[vs][0]; + res.freq_idx = v[vs][1]; + OUT_DEBUG( + std::cout << "[NelderMead|DEBUG] Converged to " << res.threads << " " << res.freq_idx << std::endl; + ) + } } + + if ( res.threads > constraint_max[0]) + res.threads = (int) constraint_max[0]; + else if ( res.threads < constraint_min[0]) + res.threads = (int) constraint_min[0]; + + if ( res.freq_idx > constraint_max[1]) + res.freq_idx = (int) constraint_max[1]; + else if ( res.freq_idx < constraint_min[1]) + res.freq_idx = (int) constraint_min[1]; std::cout << "Stop step with " << objectives[0] << " " From 546a6cfbe9b1d8d7934edbfae168f15f2de36149 Mon Sep 17 00:00:00 2001 From: Vassilis Vassiliadis Date: Wed, 21 Nov 2018 09:04:12 +0000 Subject: [PATCH 26/37] When re-evaluating scores re-pick the top NMD_NUM_KNOBS+1 configurations for the NelderMead algorithm --- src/components/nmsimplex_bbincr.cpp | 50 +++++++++++++++++++++++------ 1 file changed, 40 insertions(+), 10 deletions(-) diff --git a/src/components/nmsimplex_bbincr.cpp b/src/components/nmsimplex_bbincr.cpp index 4fe6de5..6f2126a 100644 --- a/src/components/nmsimplex_bbincr.cpp +++ b/src/components/nmsimplex_bbincr.cpp @@ -9,7 +9,8 @@ * function with complex analytical evaluation) * */ - +#include +#include #include #include @@ -323,19 +324,48 @@ void NelderMead::do_reevaluate_scores() { auto timestamp_now = std::chrono::time_point_cast(std::chrono::system_clock::now()).time_since_epoch().count(); - for (auto i=0ul; i fresh; + should_reevaluate_scores = false; - if ( entry != cache_.end() ) { - f[i] = evaluate_score(entry->second.objectives, opt_weights); - } + for ( const auto &entry: cache_ ) { + auto dt = timestamp_now - entry.second._cache_timestamp; + if ( dt <= entry.second._cache_expires_dt ) + fresh.push_back(entry.second); } - should_reevaluate_scores = false; + if ( fresh.size() >= NMD_NUM_KNOBS +1 ) { + std::sort(fresh.begin(), fresh.end(), + [this](const optstepresult &l, const optstepresult &r) mutable -> int { + return evaluate_score(l.objectives, nullptr) < + evaluate_score(r.objectives, nullptr); + }); + + for (auto i=0ul; isecond.objectives, opt_weights); + } + } + sort_vertices(); + } + OUT_DEBUG( + std::cout << "[NelderMead|DEBUG] Re-Evaluated all scores" << std::endl; + print_initial_simplex(); + ) centroid(); } From 56f3529dbcfe487a73224ed65e6848bd28b603d7 Mon Sep 17 00:00:00 2001 From: Vassilis Vassiliadis Date: Thu, 22 Nov 2018 10:33:25 +0000 Subject: [PATCH 27/37] Bugfix Faking a list of allowed cpufrequencies even when CPUFREQ is not present --- src/components/scheduler_component.cpp | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/components/scheduler_component.cpp b/src/components/scheduler_component.cpp index 44ec7fa..2b865bf 100644 --- a/src/components/scheduler_component.cpp +++ b/src/components/scheduler_component.cpp @@ -220,6 +220,13 @@ void scheduler::init() { allscale policy is the default */ std::string input_optpolicy_str = hpx::get_config_entry("allscale.policy", "none"); + if ( input_optpolicy_str == "none" ){ + char *c_optpolicy = std::getenv("ALLSCALE_LOCAL_OPTIMIZER"); + if ( c_optpolicy) + input_optpolicy_str = std::string(c_optpolicy); + } + + uselopt=false; #ifdef DEBUG_MULTIOBJECTIVE_ std::cout << "[Local Optimizer|INFO] Optimization Policy Active = " << input_optpolicy_str << std::endl; @@ -230,14 +237,7 @@ void scheduler::init() { lopt_.setPolicy(random); else if (input_optpolicy_str=="manual") lopt_.setPolicy(manual); - else if ( input_optpolicy_str == "none") { - char *c_optpolicy = std::getenv("ALLSCALE_LOCAL_OPTIMIZER"); - if ( c_optpolicy && strcmp(c_optpolicy, "allscale") == 0 ) { - lopt_.setPolicy(allscale); - uselopt=true; - } - } - else if ( input_optpolicy_str != "none" ) { + else if ( input_optpolicy_str != "none" ) { HPX_THROW_EXCEPTION(hpx::bad_request, "scheduler::init", "unknown allscale.policy"); } @@ -317,6 +317,7 @@ void scheduler::init() { << std::flush; #endif } else { + std::cout << "TRIED PARSING \"" << obj << "\"" << std::endl; HPX_THROW_EXCEPTION( hpx::bad_request, "scheduler::init", boost::str( @@ -415,6 +416,7 @@ void scheduler::init() { #else // VV: Max number of threads, and an arbitrary frequency index lopt_.reset(os_thread_count,0); + auto freq_temp = lopt_.setfrequencies({0}); #endif // VV: Set objectives after setting all constraints to From 6702140d7adb5fe752599710bfaacf0edb3dccf9 Mon Sep 17 00:00:00 2001 From: Vassilis Vassiliadis Date: Thu, 22 Nov 2018 13:56:35 +0000 Subject: [PATCH 28/37] Bugfix Setting initial configuration was not taking into account that the constraints will change in a future step --- allscale/components/localoptimizer.hpp | 5 +++-- src/components/localoptimizer.cpp | 29 +++++++++++++++++++++----- src/components/nmsimplex_bbincr.cpp | 26 +++++++++++++++-------- src/optimizer.cpp | 4 +++- 4 files changed, 47 insertions(+), 17 deletions(-) diff --git a/allscale/components/localoptimizer.hpp b/allscale/components/localoptimizer.hpp index c5e6afc..96d1f5f 100644 --- a/allscale/components/localoptimizer.hpp +++ b/allscale/components/localoptimizer.hpp @@ -35,7 +35,8 @@ enum searchPolicy { allscale, random, - manual + manual, + none }; @@ -187,7 +188,7 @@ struct localoptimizer NelderMead nmd; /* single objective optimization method used */ - searchPolicy optmethod_ = random; + searchPolicy optmethod_ = none; /* active optimization parameter - nr of OS threads active */ int threads_param_; diff --git a/src/components/localoptimizer.cpp b/src/components/localoptimizer.cpp index 1a313b1..7ea0f67 100644 --- a/src/components/localoptimizer.cpp +++ b/src/components/localoptimizer.cpp @@ -224,7 +224,8 @@ void localoptimizer::initialize_nmd(bool from_scratch) if ( min_threads < 1 ) min_threads = 1; - + int max_threads = max_threads_; + double constraint_min[] = {min_threads, min_freq}; #if defined(ALLSCALE_HAVE_CPUFREQ) double constraint_max[] = {ceil(max_threads_/(double)threads_dt), @@ -248,10 +249,28 @@ void localoptimizer::initialize_nmd(bool from_scratch) constraint_min, constraint_max); } else { - nmd.initialize_simplex(opt_weights, - nullptr, - constraint_min, - constraint_max); + if ( time_weight >= energy_weight + resource_weight ) { + double initial_simplex[3][2] = { + {min_threads, constraint_min[1]}, + {max_threads, constraint_max[1]}, + {(min_threads+max_threads)/2., constraint_max[1]} + }; + nmd.initialize_simplex(opt_weights, + initial_simplex, + constraint_min, + constraint_max); + } else { + double initial_simplex[3][2] = { + {min_threads, constraint_min[1]}, + {max_threads, constraint_min[1]}, + {(min_threads+max_threads)/2., constraint_max[1]} + }; + + nmd.initialize_simplex(opt_weights, + initial_simplex, + constraint_min, + constraint_max); + } } mo_initialized = true; diff --git a/src/components/nmsimplex_bbincr.cpp b/src/components/nmsimplex_bbincr.cpp index 6f2126a..850f439 100644 --- a/src/components/nmsimplex_bbincr.cpp +++ b/src/components/nmsimplex_bbincr.cpp @@ -227,11 +227,13 @@ void NelderMead::generate_new(F &gen) dir = logistics.second; #endif + /* OUT_DEBUG( std::cout << "[NelderMead|Debug] Rejecting " << new_set[0] << " " << new_set[1] << " will try offset " << extra[0] << " " << extra[1] << std::endl; ) + */ } else { break; } @@ -395,7 +397,7 @@ double NelderMead::evaluate_score(const double objectives[], const double *weigh #else score = 1.0; for ( auto i=0; i(std::chrono::system_clock::now()).time_since_epoch().count(); - update_constraints(constraint_min, constraint_max); + for (i=0; iconstraint_min[i] = constraint_min[i]; + this->constraint_max[i] = constraint_max[i]; + } OUT_DEBUG( std::cout << "[NelderMead|Debug] Initialize contraints " << std::endl; @@ -541,13 +547,6 @@ void NelderMead::initialize_simplex(const double weights[3], } } while (is_ok == 0); - - OUT_DEBUG( - std::cout << "[NelderMead|DEBUG] Random initial simplex [" << i << "]: "; - for ( j =0; j global_optimizer::balance_ino_nmd(const std::vector(localities_, new_mapping); } - hpx::lcos::broadcast_apply(localities_, new_threads_per_node); + + if ( threads_min != threads_max ) + hpx::lcos::broadcast_apply(localities_, new_threads_per_node); } }); } From 0d8e1f8b2c993563e36d66ee5ae006c1b3771254 Mon Sep 17 00:00:00 2001 From: Vassilis Vassiliadis Date: Thu, 22 Nov 2018 14:19:50 +0000 Subject: [PATCH 29/37] Modified initial simplex --- src/components/localoptimizer.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/components/localoptimizer.cpp b/src/components/localoptimizer.cpp index 7ea0f67..d002112 100644 --- a/src/components/localoptimizer.cpp +++ b/src/components/localoptimizer.cpp @@ -252,7 +252,7 @@ void localoptimizer::initialize_nmd(bool from_scratch) if ( time_weight >= energy_weight + resource_weight ) { double initial_simplex[3][2] = { {min_threads, constraint_min[1]}, - {max_threads, constraint_max[1]}, + {max_threads/2.0, (constraint_min[1]+constraint_max[1])/2.0}, {(min_threads+max_threads)/2., constraint_max[1]} }; nmd.initialize_simplex(opt_weights, @@ -262,7 +262,7 @@ void localoptimizer::initialize_nmd(bool from_scratch) } else { double initial_simplex[3][2] = { {min_threads, constraint_min[1]}, - {max_threads, constraint_min[1]}, + {max_threads/2.0, (constraint_min[1]+constraint_max[1])/2.0}, {(min_threads+max_threads)/2., constraint_max[1]} }; From 38d78f90264238e46caa238966215019c2d63546 Mon Sep 17 00:00:00 2001 From: Vassilis Vassiliadis Date: Thu, 22 Nov 2018 14:36:43 +0000 Subject: [PATCH 30/37] Final exploration will re-use even stale chache entries --- src/components/nmsimplex_bbincr.cpp | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/src/components/nmsimplex_bbincr.cpp b/src/components/nmsimplex_bbincr.cpp index 850f439..851156d 100644 --- a/src/components/nmsimplex_bbincr.cpp +++ b/src/components/nmsimplex_bbincr.cpp @@ -1382,14 +1382,36 @@ bool NelderMead::testConvergence(std::size_t tested_combinations) warming_up_step = 0; itr --; convergence_reevaluating = true; + std::vector fresh; + + for ( const auto &entry: cache_ ) { + fresh.push_back(entry.second); + } + cache_.clear(); + std::sort(fresh.begin(), fresh.end(), + [this](const optstepresult &l, const optstepresult &r) mutable -> int { + return evaluate_score(l.objectives, nullptr) < + evaluate_score(r.objectives, nullptr); + }); + + for (auto i=0ul; i Date: Sun, 25 Nov 2018 21:02:50 +0000 Subject: [PATCH 31/37] INO_NMD now toggles active_nodes_ plus Generic implementation of NMD --- allscale/components/nmd.hpp | 141 ++++++ src/components/nmd.cpp | 871 ++++++++++++++++++++++++++++++++++++ src/optimizer.cpp | 18 +- 3 files changed, 1027 insertions(+), 3 deletions(-) create mode 100644 allscale/components/nmd.hpp create mode 100644 src/components/nmd.cpp diff --git a/allscale/components/nmd.hpp b/allscale/components/nmd.hpp new file mode 100644 index 0000000..4f18ea2 --- /dev/null +++ b/allscale/components/nmd.hpp @@ -0,0 +1,141 @@ +/* +Nelder Mead implementation for arbitrary number of knobs and number of objectives. + +Developed explicitly for non-continuous search spaces. + +Important information +--------------------- + +This implementation uses a cache coupled with the exploration-heuristic that is explained +bellow to refrain from evaluating the same set of knobs multiple times. + +If NMD proposes to explore a knob-set that has been recently evaluated (i.e. there's a +non stale entry in the cache) the heuristic will instead propose the closest point that is +enclosed within the N-dimensional (where N = num_knobs) space near the knob set that NMD +initially proposed. The N-dimensional space takes a form of a square, Cube, Hypercube for +N=2, 3, 4. Each edge may be at most @max_distance_long (see generate_unique) for more info. + +author: vasiliadis.vasilis@gmail.com +*/ +#ifndef ALLSCALE_NMD_HEADER +#include +#include +#include +#include + +namespace allscale { +namespace components { + +struct logistics { + std::vector objectives; + std::vector knobs; + + int64_t cache_ts, cache_dt; + + bool converged; +}; + +#define ALPHA 1.0 /* reflection coefficient */ +#define BETA 0.5 /* contraction coefficient */ +#define GAMMA 2.0 /* expansion coefficient */ +#define DELTA 0.5 /* shrinking coefficient */ + +class NmdGeneric { +public: + NmdGeneric(); + NmdGeneric(std::size_t num_knobs, std::size_t num_objectives, + double conv_threshold, int64_t cache_expire_dt_ms, + std::size_t max_iters); + NmdGeneric(const NmdGeneric& other); + + void initialize(std::size_t constraint_min[], std::size_t constraint_max[], + std::size_t *initial_config[], double weights[]); + + void ensure_profile_consistency(std::size_t expected[], const std::size_t observed[]) const; + + void set_constraints_now(std::size_t constraint_min[], std::size_t constraint_max[]); + + double score(const double measurements[]) const; + + std::pair, bool> get_next(const double measurements[], + std::size_t observed_knobs[]); + +// protected: + bool test_convergence(); + std::vector do_warmup(const double measurements[], + std::size_t observed_knobs[]); + std::vector do_reflect(const double measurements[], + std::size_t observed_knobs[]); + std::vector do_expand(const double measurements[], + std::size_t observed_knobs[]); + std::vector do_contract_in(const double measurements[], + std::size_t observed_knobs[]); + std::vector do_contract_out(const double measurements[], + std::size_t observed_knobs[]); + std::vector do_shrink(); + std::vector do_start(bool consult_cache); + + void sort_simplex(bool consult_cache=true); + void compute_centroid(); + + void generate_unique(std::size_t initial[], bool accept_stale, + const std::set > *extra) const; + std::size_t compute_max_combinations() const; + + template + void apply_constraint(T knobs[]) const + { + for (auto i=0ul; i (T) constraint_max[i] ) + knobs[i] = constraint_max[i]; + } + } + + //VV: Used to generate all possible combinations of +- + // from: https://stackoverflow.com/questions/4633584/ + template + bool next_binary(Iter begin, Iter end) const + { + while (begin != end) // we're not done yet + { + --end; + if ((*end & 1) == 0) // even number is treated as zero + { + ++*end; // increase to one + return true; // still more numbers to come + } + else // odd number is treated as one + { + --*end; // decrease to zero and loop + } + } + return false; // that was the last number + } + + enum estate {warmup, start, reflect, expand, contract_in, contract_out, shrink}; + estate current_state; + std::size_t warmup_step; + + double conv_threshold; + std::size_t num_knobs; + std::size_t num_objectives; + + double *scores; + std::size_t **simplex, **initial_config; + std::size_t *constraint_max, *constraint_min; + std::size_t *point_reflect, *point_contract, *point_expand, *centroid; + std::map< std::vector, logistics> cache; + int64_t cache_expire_dt_ms; + double *weights; + std::size_t times_reentered_start; + double score_reflect, score_contract, score_expand; + bool final_explore; + std::size_t iteration, max_iters; +}; + +} +} + +#endif \ No newline at end of file diff --git a/src/components/nmd.cpp b/src/components/nmd.cpp new file mode 100644 index 0000000..9bae1cf --- /dev/null +++ b/src/components/nmd.cpp @@ -0,0 +1,871 @@ +#include +#include +#include +#include +#include +#include +#include + + +// #define NMD_DEBUG_ +// #define NMD_INFO_ + +#ifdef NMD_DEBUG_ +#define OUT_DEBUG(X) X +#ifndef NMD_INFO_ + #define NMD_INFO_ +#endif +#else +#define OUT_DEBUG(X) {} +#endif + +#if defined(NMD_INFO_) +#define OUT_INFO(X) X +#else +#define OUT_INFO(X) {} +#endif + + +using namespace allscale::components; + +NmdGeneric::NmdGeneric() +: +current_state(warmup), warmup_step(0), +conv_threshold(0), num_knobs(0), num_objectives(0), +scores(nullptr), simplex(nullptr), initial_config(nullptr), +constraint_max(nullptr), constraint_min(nullptr), +point_reflect(nullptr), point_contract(nullptr), weights(nullptr) +{} + +NmdGeneric::NmdGeneric(std::size_t num_knobs, + std::size_t num_objectives, + double conv_threshold, + int64_t cache_expire_dt_ms, + std::size_t max_iters) +: conv_threshold(conv_threshold), num_knobs(num_knobs), +num_objectives(num_objectives), +cache_expire_dt_ms(cache_expire_dt_ms), +final_explore(false), +max_iters(max_iters) +{ + scores = new double [num_knobs+1]; + centroid = new std::size_t [num_knobs]; + simplex = new std::size_t* [num_knobs+1]; + initial_config = new std::size_t* [num_knobs+1]; + + for (auto i=0ul; i 1.0 ) { + ret = 1.0; + } + return 1.0 - ret; +} + +void NmdGeneric::initialize(std::size_t constraint_min[], std::size_t constraint_max[], + std::size_t *initial_config[], double weights[]) +{ + for (auto i=0ul; iweights[i] = weights[i]; + + set_constraints_now(constraint_min, constraint_max); + + iteration = 0; + + if ( initial_config == nullptr ) { + std::set > fake; + + for (auto i=0ul; iinitial_config[i][j] = std::rand() % width + constraint_min[j]; + } + + generate_unique(this->initial_config[i], true, &fake); + auto new_key = std::vector(); + new_key.assign(this->initial_config[i], this->initial_config[i]+num_knobs); + fake.insert(new_key); + } + } else { + for (auto i=0ul; iinitial_config[i][j] = initial_config[i][j]; + } + + current_state = warmup; + warmup_step = 0; + + OUT_INFO( + for (auto i=0ul; iinitial_config[i][j] << " "; + std::cout << std::endl; + } + ) + + final_explore = false; + times_reentered_start = 0; +} + +void NmdGeneric::set_constraints_now(std::size_t constraint_min[], + std::size_t constraint_max[]) +{ + for (auto i=0ul; iconstraint_max[i] = constraint_max[i]; + this->constraint_min[i] = constraint_min[i]; + } +} + +void NmdGeneric::generate_unique(std::size_t initial[], bool accept_stale=false, + const std::set > *extra=nullptr) const +{ + const auto ts_now = std::chrono::time_point_cast(std::chrono::system_clock::now()).time_since_epoch().count(); + + auto explored = (std::size_t) std::count_if(cache.begin(), cache.end(), [ts_now, accept_stale](const auto &entry) { + auto dt = ts_now - entry.second.cache_ts; + return accept_stale || dt < entry.second.cache_dt; + }); + + auto max_comb = compute_max_combinations(); + + if ( max_comb > explored && max_comb - explored > 1 ) { + // VV: TODO Optimize check_novel(). Currently, large "max_distance" values + // may result in extreme overheads + const auto max_distance = 3ul; + int64_t temp[num_knobs]; + std::set< std::vector > candidates; + + auto check_novel = [this, &ts_now, &candidates, &accept_stale, &extra](int64_t knobs[]) mutable -> void { + apply_constraint(knobs); + + auto key = std::vector(); + + key.assign(knobs, knobs+num_knobs); + auto entry = cache.find(key); + if ( extra == nullptr || extra->find(key) == extra->end()) { + if ( entry == cache.end() ) { + candidates.insert(key); + } else { + auto dt = ts_now - entry->second.cache_ts; + if (accept_stale==false || + (dt >= entry->second.cache_dt && cache_expire_dt_ms > 0) ) { + candidates.insert(key); + } + } + } + }; + + auto counters = std::vector(num_knobs, 0ul); + + bool done = false; + + while ( done == false ) { + // VV: Generate all possible permutations + auto ops = std::string(num_knobs, '0'); + do{ + for ( auto j=0ul; j constraint_max[i] - constraint_min[i] +1) || + (counters[i] > max_distance) ) { + counters[i] = 0; + counters[i+1] += 1; + } + } + + if ( (counters[num_knobs-1] > + constraint_max[num_knobs-1] - constraint_min[num_knobs-1] +1) + || (counters[num_knobs-1] > max_distance)) + done = true; + } + + // std::cout << "Step " << candidates.size() << std::endl; + + std::vector< std::vector > sorted; + + sorted.assign(candidates.begin(), candidates.end()); + candidates.clear(); + + std::sort(sorted.begin(), sorted.end(), + [initial](const auto &e1, const auto &e2) mutable -> int { + int64_t t; + std::size_t d1=0ul, d2=0ul; + + for (auto i=0ul; i(); + const auto ts_now = std::chrono::time_point_cast(std::chrono::system_clock::now()).time_since_epoch().count(); + + for ( auto i = 0ul; isecond; + p.cache_ts = ts_now; + p.cache_dt = cache_expire_dt_ms; + entry->second = p; + } + + OUT_DEBUG( + std::cout << "CACHE ENTRIES: "<(); + key.assign(point_reflect, point_reflect + num_knobs); + + auto entry = cache.find(key); + + current_state = reflect; + + if ( entry != cache.end() + && times_reentered_start++ < 5 + && iteration < max_iters ) { + auto ts_now = std::chrono::time_point_cast(std::chrono::system_clock::now()).time_since_epoch().count(); + + if ( ts_now - entry->second.cache_ts < entry->second.cache_dt ) { + return do_reflect(entry->second.objectives.data(), entry->second.knobs.data()); + } + } + + return key; +} + +std::vector NmdGeneric::do_shrink() +{ + OUT_DEBUG( + std::cout << "[NMD|Dbg] INNER shrink" << std::endl; + ) + + std::set > fake; + std::vector key; + + for ( auto i=0ul; iinitial_config[i][j] << " "; + std::cout << std::endl; + } + ) + + return do_warmup({}, {}); +} + +std::vector NmdGeneric::do_contract_out(const double measurements[], + std::size_t observed_knobs[]) +{ + ensure_profile_consistency(point_contract, observed_knobs); + score_contract = score(measurements); + OUT_DEBUG( + std::cout << "[NMD|Dbg] INNER ContractOUT: "; + for (auto i=0ul; i(std::chrono::system_clock::now()).time_since_epoch().count(); + + logistics entry; + entry.knobs.assign(observed_knobs, observed_knobs+num_knobs); + entry.objectives.assign(measurements, measurements+num_objectives); + entry.cache_dt = cache_expire_dt_ms; + entry.cache_ts = ts_now; + + cache[entry.knobs] = entry; + + if ( score_contract <= score_reflect ){ + // VV: foc <= fr then replace v[n] with voc + + for (auto i=0ul; i NmdGeneric::do_contract_in(const double measurements[], + std::size_t observed_knobs[]) +{ + ensure_profile_consistency(point_contract, observed_knobs); + score_contract = score(measurements); + + OUT_DEBUG( + std::cout << "[NMD|Dbg] INNER ContractIN: "; + for (auto i=0ul; i(std::chrono::system_clock::now()).time_since_epoch().count(); + + logistics entry; + entry.knobs.assign(observed_knobs, observed_knobs+num_knobs); + entry.objectives.assign(measurements, measurements+num_objectives); + entry.cache_dt = cache_expire_dt_ms; + entry.cache_ts = ts_now; + + cache[entry.knobs] = entry; + + if ( score_contract < scores[num_knobs] ){ + // VV: fic < f[n] then replace v[n] with vic + + for (auto i=0ul; i NmdGeneric::do_expand(const double measurements[], + std::size_t observed_knobs[]) +{ + ensure_profile_consistency(point_expand, observed_knobs); + score_expand = score(measurements); + + OUT_DEBUG( + std::cout << "[NMD|Dbg] INNER Expand: "; + for (auto i=0ul; i(std::chrono::system_clock::now()).time_since_epoch().count(); + + logistics entry; + entry.knobs.assign(observed_knobs, observed_knobs+num_knobs); + entry.objectives.assign(measurements, measurements+num_objectives); + entry.cache_dt = cache_expire_dt_ms; + entry.cache_ts = ts_now; + + cache[entry.knobs] = entry; + + if ( score_expand < score_reflect ){ + // VV: fe < fr then replace v[n] with ve + for (auto i=0ul; i NmdGeneric::do_reflect(const double measurements[], + std::size_t observed_knobs[]) +{ + ensure_profile_consistency(point_reflect, observed_knobs); + score_reflect = score(measurements); + + OUT_DEBUG( + std::cout << "[NMD|Dbg] INNER Reflect: "; + for (auto i=0ul; i(std::chrono::system_clock::now()).time_since_epoch().count(); + + logistics entry; + entry.knobs.assign(observed_knobs, observed_knobs+num_knobs); + entry.objectives.assign(measurements, measurements+num_objectives); + entry.cache_dt = cache_expire_dt_ms; + entry.cache_ts = ts_now; + + cache[entry.knobs] = entry; + + if ( score_reflect >= scores[0] && score_reflect < scores[num_knobs-1]) { + // VV: fo <= fr < f[n-1] then replace v[n] with vr and start over + for ( auto i=0ul; i(); + key.assign(point_expand, point_expand+num_knobs); + auto e = cache.find(key); + + if ( e != cache.end() ) { + if ( ts_now - e->second.cache_ts < e->second.cache_dt ) { + return do_expand(e->second.objectives.data(), + e->second.knobs.data()); + } + } + + return key; + } else if (scores[num_knobs-1] <= score_reflect + && score_reflect < scores[num_knobs]) { + // VV: Reflect lies between f[n-1] and f[n] then contract (outside) + current_state = contract_out; + double temp[num_knobs]; + + for (auto i=0ul; i(); + key.assign(point_contract, point_contract+num_knobs); + auto e = cache.find(key); + + if ( e != cache.end() ) { + if ( ts_now - e->second.cache_ts < e->second.cache_dt ) { + return do_contract_out(e->second.objectives.data(), + e->second.knobs.data()); + } + } + + return key; + } else if (score_reflect >= scores[num_knobs]) { + // VV: Reflect > f[n] then contract (inside) + current_state = contract_in; + double temp[num_knobs]; + + for (auto i=0ul; i(); + key.assign(point_contract, point_contract+num_knobs); + auto e = cache.find(key); + + if ( e != cache.end() ) { + if ( ts_now - e->second.cache_ts < e->second.cache_dt ) { + return do_contract_in(e->second.objectives.data(), + e->second.knobs.data()); + } + } + + return key; + } + + OUT_INFO( + std::cout << "[NMD|Info] Should never get here" << std::endl; + ) + + current_state = start; + return do_start(true); +} + +std::vector NmdGeneric::do_warmup(const double measurements[], + std::size_t observed_knobs[]) +{ + std::vector ret; + OUT_DEBUG( + std::cout << "[NMD|Dbg] INNER warmup" << std::endl; + ) + + if ( warmup_step > 0 ) { + auto last = warmup_step - 1; + ensure_profile_consistency(initial_config[last], observed_knobs); + memcpy(simplex[last], initial_config[last], sizeof(std::size_t)*num_knobs); + scores[last] = score(measurements); + auto key = std::vector(); + key.assign(observed_knobs, observed_knobs+num_knobs); + + logistics entry; + + entry.cache_dt = cache_expire_dt_ms; + entry.cache_ts = std::chrono::time_point_cast(std::chrono::system_clock::now()).time_since_epoch().count(); + entry.knobs.assign(observed_knobs, observed_knobs+num_knobs); + entry.objectives.assign(measurements, measurements+num_objectives); + + cache[key] = entry; + + OUT_DEBUG( + auto s = score(measurements); + std::cout << "[NMD|Dbg] Score: " << s << " for "; + for( auto i=0ul; i, bool> NmdGeneric::get_next(const double measurements[], + std::size_t observed_knobs[]) +{ + std::vector ret; + #if defined(NMD_DEBUG_) || defined(NMD_INFO_) + const char *state_names[] = { + "warmup", + "start", + "reflect", + "expand", + "contract_in", + "contract_out", + "shrink" + }; + #endif + + OUT_DEBUG( + std::cout << "[NMD|Dbg] Current stage " << state_names[current_state] << std::endl; + ) + + switch (current_state) { + case warmup: + ret = do_warmup(measurements, observed_knobs); + break; + case start: + times_reentered_start = 0; + ret = do_start(true); + break; + case reflect: + ret = do_reflect(measurements, observed_knobs); + break; + case expand: + ret = do_expand(measurements, observed_knobs); + break; + case contract_in: + ret = do_contract_in(measurements, observed_knobs); + break; + case contract_out: + ret = do_contract_out(measurements, observed_knobs); + break; + case shrink: + ret = do_shrink(); + break; + default: + std::cout << "Unknown state!" << std::endl; + } + + OUT_INFO( + std::cout << "[NMD|Info] State " << state_names[current_state] << " proposes "; + + for (auto i=0ul; i= max_iters || sum <= conv_threshold ) { + // if ( final_explore == false ) { + // final_explore = true; + + // return false; + // } else { + // return true; + // } + OUT_INFO( + std::cout << "[NMD|Info] Converged at " << sum + << " threshold: " << conv_threshold << std::endl; + + std::cout << "[NMD|Info] Converged simplex" << std::endl; + for ( auto i=0ul; i global_optimizer::balance_ino_nmd(const std::vector num_active_nodes ) new_num_nodes = num_active_nodes; @@ -778,6 +784,12 @@ hpx::future global_optimizer::balance_ino_nmd(const std::vector(localities_, new_mapping); + + for (auto i=0u; i Date: Mon, 26 Nov 2018 16:36:26 +0000 Subject: [PATCH 32/37] Minor improvements to NmdGeneric and alternative tuned based on NMD - NmdGeneric accepts a score-function that's a parameter - Reverted default score function to speed/efficiency/power (dashboard) - Fixed a couple of small bugs in nmsimplex_bbincr (for use with legacy global and local NMD optimizers which rely on time/threads/power) --- allscale/components/nmd.hpp | 45 +++- allscale/components/nmsimplex_bbincr.hpp | 10 +- allscale/tuner.hpp | 19 ++ src/CMakeLists.txt | 1 + src/components/localoptimizer.cpp | 8 +- src/components/nmd.cpp | 60 ++--- src/components/nmsimplex_bbincr.cpp | 279 ++++++++++++----------- src/dashboard.cpp | 2 +- src/optimizer.cpp | 22 +- src/tuner.cpp | 103 +++++++++ 10 files changed, 359 insertions(+), 190 deletions(-) diff --git a/allscale/components/nmd.hpp b/allscale/components/nmd.hpp index 4f18ea2..7f462ff 100644 --- a/allscale/components/nmd.hpp +++ b/allscale/components/nmd.hpp @@ -18,10 +18,12 @@ N=2, 3, 4. Each edge may be at most @max_distance_long (see generate_unique) for author: vasiliadis.vasilis@gmail.com */ #ifndef ALLSCALE_NMD_HEADER +#define ALLSCALE_NMD_HEADER #include #include #include #include +#include namespace allscale { namespace components { @@ -46,32 +48,51 @@ class NmdGeneric { NmdGeneric(std::size_t num_knobs, std::size_t num_objectives, double conv_threshold, int64_t cache_expire_dt_ms, std::size_t max_iters); - NmdGeneric(const NmdGeneric& other); - void initialize(std::size_t constraint_min[], std::size_t constraint_max[], - std::size_t *initial_config[], double weights[]); + static double score_speed_efficiency_power(const double measurements[], const double weights[]) + { + double ret = std::pow(measurements[0], weights[0]) * + std::pow(measurements[1], weights[1]) * + std::pow((1-measurements[2]), weights[2]); + + if ( std::isfinite(ret) == 0 || ret > 1.0 ) { + ret = 1.0; + } + + return 1.0 - ret; + } + + void initialize(const std::size_t constraint_min[], const std::size_t constraint_max[], + const std::size_t *initial_config[], const double weights[], + double (*score_function)(const double[], const double [])); void ensure_profile_consistency(std::size_t expected[], const std::size_t observed[]) const; - void set_constraints_now(std::size_t constraint_min[], std::size_t constraint_max[]); + void set_constraints_now(const std::size_t constraint_min[], + const std::size_t constraint_max[]); double score(const double measurements[]) const; std::pair, bool> get_next(const double measurements[], - std::size_t observed_knobs[]); + const std::size_t observed_knobs[]); -// protected: +protected: bool test_convergence(); + + // VV: (measurements, weights) returns value in range [0.0, infinite) + // 0.0 means perfect score (i.e. the larger the score, the worse it is) + double (*score_function)(const double[], const double []); + std::vector do_warmup(const double measurements[], - std::size_t observed_knobs[]); + const std::size_t observed_knobs[]); std::vector do_reflect(const double measurements[], - std::size_t observed_knobs[]); + const std::size_t observed_knobs[]); std::vector do_expand(const double measurements[], - std::size_t observed_knobs[]); + const std::size_t observed_knobs[]); std::vector do_contract_in(const double measurements[], - std::size_t observed_knobs[]); - std::vector do_contract_out(const double measurements[], - std::size_t observed_knobs[]); + const std::size_t observed_knobs[]); + std::vector do_contract_out(const double measurements[], + const std::size_t observed_knobs[]); std::vector do_shrink(); std::vector do_start(bool consult_cache); diff --git a/allscale/components/nmsimplex_bbincr.hpp b/allscale/components/nmsimplex_bbincr.hpp index 11e5d09..891938d 100644 --- a/allscale/components/nmsimplex_bbincr.hpp +++ b/allscale/components/nmsimplex_bbincr.hpp @@ -81,7 +81,8 @@ enum iterationstates start, reflection, expansion, - contraction, + contraction_in, + contraction_out, shrink }; @@ -165,9 +166,12 @@ class NelderMead double knob1, double knob2); optstepresult do_step_expand(const double objectives[], double knob1, double knob2); - optstepresult do_step_contract(const double objectives[], + optstepresult do_step_contract_in(const double objectives[], double knob1, double knob2); - optstepresult do_step_shrink(const double objectives[], + optstepresult do_step_contract_out(const double objectives[], + double knob1, double knob2); + optstepresult do_step_shrink(); + optstepresult do_step_warmup(const double objectives[], double knob1, double knob2); void sort_vertices(void); diff --git a/allscale/tuner.hpp b/allscale/tuner.hpp index da28253..f1285a8 100644 --- a/allscale/tuner.hpp +++ b/allscale/tuner.hpp @@ -3,6 +3,7 @@ #define ALLSCALE_TUNER_HPP #include +#include #include #include @@ -74,6 +75,24 @@ namespace allscale { void next_direction(); }; + + struct nmd_optimizer : tuner + { + nmd_optimizer(std::size_t nodes_min, std::size_t nodes_max); + components::NmdGeneric nmd; + std::vector avail_freqs; + std::vector best; + bool converged; + bool initialized; + // VV: even though NmdGeneric supports arbitrary number of optimization parameters + // we're applying it to number of nodes and CPU frequency, it is trivial to + // add number of threads + std::size_t constraint_min[2], constraint_max[2]; + + tuner_configuration next(tuner_configuration const& current_cfg, tuner_state const& current_state, tuning_objective) override; + + double previous_weights[3]; + }; } #endif diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 25cf7c9..1481fbf 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -25,6 +25,7 @@ set(_srcs components/util/graph_colouring.cpp components/localoptimizer.cpp components/nmsimplex_bbincr.cpp + components/nmd.cpp ) if(CPUFREQ_FOUND) diff --git a/src/components/localoptimizer.cpp b/src/components/localoptimizer.cpp index d002112..52989d8 100644 --- a/src/components/localoptimizer.cpp +++ b/src/components/localoptimizer.cpp @@ -189,7 +189,7 @@ void localoptimizer::setmaxthreads(std::size_t threads) if ( min_threads < 1 ) min_threads = 1; - double constraint_min[] = {min_threads, min_freq}; + double constraint_min[] = {(double) min_threads, (double) min_freq}; #if defined(ALLSCALE_HAVE_CPUFREQ) double constraint_max[] = {ceil(max_threads_/(double)threads_dt), (double)max_freq}; @@ -226,7 +226,7 @@ void localoptimizer::initialize_nmd(bool from_scratch) min_threads = 1; int max_threads = max_threads_; - double constraint_min[] = {min_threads, min_freq}; + double constraint_min[] = { (double) min_threads, (double) min_freq}; #if defined(ALLSCALE_HAVE_CPUFREQ) double constraint_max[] = {ceil(max_threads_/(double)threads_dt), (double)max_freq}; @@ -251,7 +251,7 @@ void localoptimizer::initialize_nmd(bool from_scratch) } else { if ( time_weight >= energy_weight + resource_weight ) { double initial_simplex[3][2] = { - {min_threads, constraint_min[1]}, + {(double) min_threads, constraint_min[1]}, {max_threads/2.0, (constraint_min[1]+constraint_max[1])/2.0}, {(min_threads+max_threads)/2., constraint_max[1]} }; @@ -261,7 +261,7 @@ void localoptimizer::initialize_nmd(bool from_scratch) constraint_max); } else { double initial_simplex[3][2] = { - {min_threads, constraint_min[1]}, + {(double) min_threads, constraint_min[1]}, {max_threads/2.0, (constraint_min[1]+constraint_max[1])/2.0}, {(min_threads+max_threads)/2., constraint_max[1]} }; diff --git a/src/components/nmd.cpp b/src/components/nmd.cpp index 9bae1cf..5893b5b 100644 --- a/src/components/nmd.cpp +++ b/src/components/nmd.cpp @@ -7,8 +7,8 @@ #include -// #define NMD_DEBUG_ -// #define NMD_INFO_ +#define NMD_DEBUG_ +#define NMD_INFO_ #ifdef NMD_DEBUG_ #define OUT_DEBUG(X) X @@ -68,35 +68,30 @@ max_iters(max_iters) weights = new double [num_objectives]; } -NmdGeneric::NmdGeneric(const NmdGeneric& other) -{ - -} - double NmdGeneric::score(const double measurements[]) const { - double ret = std::pow(measurements[0], weights[0]) * - std::pow(measurements[1], weights[1]) * - std::pow((1-measurements[2]), weights[2]); - - if ( std::isfinite(ret) == 0 || ret > 1.0 ) { - ret = 1.0; - } - return 1.0 - ret; + return (*score_function)(measurements, weights); } -void NmdGeneric::initialize(std::size_t constraint_min[], std::size_t constraint_max[], - std::size_t *initial_config[], double weights[]) +void NmdGeneric::initialize(const std::size_t constraint_min[], + const std::size_t constraint_max[], + const std::size_t *initial_config[], + const double weights[], double (*score_function)(const double[], const double [])) { for (auto i=0ul; iweights[i] = weights[i]; + + this->score_function = score_function; set_constraints_now(constraint_min, constraint_max); iteration = 0; - if ( initial_config == nullptr ) { std::set > fake; + + OUT_INFO( + std::cout << "[NMD|Info] Generating initial config for " << num_knobs << std::endl; + ) for (auto i=0ul; iinitial_config[i][j] = std::rand() % width + constraint_min[j]; } - generate_unique(this->initial_config[i], true, &fake); + generate_unique(this->initial_config[i], false, &fake); auto new_key = std::vector(); new_key.assign(this->initial_config[i], this->initial_config[i]+num_knobs); fake.insert(new_key); @@ -131,8 +126,8 @@ void NmdGeneric::initialize(std::size_t constraint_min[], std::size_t constraint times_reentered_start = 0; } -void NmdGeneric::set_constraints_now(std::size_t constraint_min[], - std::size_t constraint_max[]) +void NmdGeneric::set_constraints_now(const std::size_t constraint_min[], + const std::size_t constraint_max[]) { for (auto i=0ul; iconstraint_max[i] = constraint_max[i]; @@ -170,6 +165,11 @@ void NmdGeneric::generate_unique(std::size_t initial[], bool accept_stale=false, if ( entry == cache.end() ) { candidates.insert(key); } else { + std::cout << "Found "; + for (auto i=0ul; isecond.cache_ts; if (accept_stale==false || (dt >= entry->second.cache_dt && cache_expire_dt_ms > 0) ) { @@ -366,7 +366,7 @@ std::vector NmdGeneric::do_start(bool consult_cache=true) std::cout << "[NMD|Dbg] INNER start" << std::endl; ) iteration ++; - sort_simplex(consult_cache); + sort_simplex(false); compute_centroid(); double temp[num_knobs]; @@ -457,7 +457,7 @@ std::vector NmdGeneric::do_shrink() } std::vector NmdGeneric::do_contract_out(const double measurements[], - std::size_t observed_knobs[]) + const std::size_t observed_knobs[]) { ensure_profile_consistency(point_contract, observed_knobs); score_contract = score(measurements); @@ -494,7 +494,7 @@ std::vector NmdGeneric::do_contract_out(const double measurements[] } std::vector NmdGeneric::do_contract_in(const double measurements[], - std::size_t observed_knobs[]) + const std::size_t observed_knobs[]) { ensure_profile_consistency(point_contract, observed_knobs); score_contract = score(measurements); @@ -532,7 +532,7 @@ std::vector NmdGeneric::do_contract_in(const double measurements[], std::vector NmdGeneric::do_expand(const double measurements[], - std::size_t observed_knobs[]) + const std::size_t observed_knobs[]) { ensure_profile_consistency(point_expand, observed_knobs); score_expand = score(measurements); @@ -571,7 +571,7 @@ std::vector NmdGeneric::do_expand(const double measurements[], } std::vector NmdGeneric::do_reflect(const double measurements[], - std::size_t observed_knobs[]) + const std::size_t observed_knobs[]) { ensure_profile_consistency(point_reflect, observed_knobs); score_reflect = score(measurements); @@ -688,7 +688,7 @@ std::vector NmdGeneric::do_reflect(const double measurements[], } std::vector NmdGeneric::do_warmup(const double measurements[], - std::size_t observed_knobs[]) + const std::size_t observed_knobs[]) { std::vector ret; OUT_DEBUG( @@ -757,7 +757,7 @@ std::vector NmdGeneric::do_warmup(const double measurements[], } std::pair, bool> NmdGeneric::get_next(const double measurements[], - std::size_t observed_knobs[]) + const std::size_t observed_knobs[]) { std::vector ret; #if defined(NMD_DEBUG_) || defined(NMD_INFO_) @@ -817,8 +817,10 @@ std::pair, bool> NmdGeneric::get_next(const double meas converged = test_convergence(); } - if ( converged ) + if ( converged ) { + sort_simplex(true); ret.assign(simplex[0], simplex[0] + num_knobs); + } return std::make_pair(ret, converged); } diff --git a/src/components/nmsimplex_bbincr.cpp b/src/components/nmsimplex_bbincr.cpp index 851156d..430afac 100644 --- a/src/components/nmsimplex_bbincr.cpp +++ b/src/components/nmsimplex_bbincr.cpp @@ -857,7 +857,7 @@ optstepresult NelderMead::do_step_reflect(const double objectives[], res.threads = vc[0]; res.freq_idx = vc[1]; - state_ = contraction; + state_ = contraction_out; auto key = std::make_pair(res.threads, res.freq_idx); @@ -870,7 +870,7 @@ optstepresult NelderMead::do_step_reflect(const double objectives[], if (dt < entry->second._cache_expires_dt) { - return do_step_contract(entry->second.objectives, + return do_step_contract_out(entry->second.objectives, entry->second.threads, entry->second.freq_idx); } @@ -896,7 +896,7 @@ optstepresult NelderMead::do_step_reflect(const double objectives[], res.threads = vc[0]; res.freq_idx = vc[1]; - state_ = contraction; + state_ = contraction_in; auto key = std::make_pair(res.threads, res.freq_idx); auto entry = cache_.find(key); @@ -908,7 +908,7 @@ optstepresult NelderMead::do_step_reflect(const double objectives[], if (dt < entry->second._cache_expires_dt) { - return do_step_contract(entry->second.objectives, + return do_step_contract_in(entry->second.objectives, entry->second.threads, entry->second.freq_idx); } @@ -976,12 +976,12 @@ optstepresult NelderMead::do_step_expand(const double objectives[], return do_step_start(); } -optstepresult NelderMead::do_step_contract(const double objectives[], +optstepresult NelderMead::do_step_contract_in(const double objectives[], double knob1, double knob2) { int j; #ifdef NMD_DEBUG_ - std::cout << "[NelderMead|DEBUG] State = Contraction" << std::endl; + std::cout << "[NelderMead|DEBUG] State = ContractionIN" << std::endl; #endif fc = evaluate_score(objectives, nullptr); @@ -1007,11 +1007,11 @@ optstepresult NelderMead::do_step_contract(const double objectives[], cache_update((int)vc[0], (int)vc[1], objectives, true); } - if (fc <= fr) + if (fc <= f[NMD_NUM_KNOBS]) { - // VV: CONTRACTED_O is better than REFLECTED - // Replace WORST with CONTRACTED_O - for (j = 0; j <= n - 1; j++) + // VV: CONTRACTED_I is better than WORST + // Replace WORST with CONTRACTED_I + for (j = 0; j < NMD_NUM_KNOBS; j++) { v[vg][j] = vc[j]; } @@ -1025,87 +1025,159 @@ optstepresult NelderMead::do_step_contract(const double objectives[], } else { - // VV: Replace SECOND BEST - double new_vh[NMD_NUM_KNOBS]; - - auto gen_new = [this, &new_vh](double *extra) mutable -> double* { - for (auto j = 0; j < NMD_NUM_KNOBS; j++) - new_vh[j] = v[vs][j] + DELTA * (v[vh][j] - v[vs][j]) - extra[j]; - - my_constraints(new_vh); - - return new_vh; - }; - - generate_new(gen_new); - - for (j = 0; j < NMD_NUM_KNOBS; j++) - v[vh][j] = new_vh[j]; - - // VV: Now evaluate SHRINK - - optstepresult res; - res.threads = v[vh][0]; - res.freq_idx = v[vh][1]; state_ = shrink; - - auto key = std::make_pair(res.threads, res.freq_idx); - - auto entry = cache_.find(key); - - if (entry != cache_.end()) - { - auto timestamp_now = std::chrono::time_point_cast(std::chrono::system_clock::now()).time_since_epoch().count(); - auto dt = timestamp_now - entry->second._cache_timestamp; - - if (dt < entry->second._cache_expires_dt) - { - return do_step_shrink(entry->second.objectives, - entry->second.threads, - entry->second.freq_idx); - } - } - - return res; + return do_step_shrink(); } } -optstepresult NelderMead::do_step_shrink(const double objectives[], - double knob1, double knob2) +optstepresult NelderMead::do_step_contract_out(const double objectives[], + double knob1, double knob2) { + int j; #ifdef NMD_DEBUG_ - std::cout << "[NelderMead|DEBUG] State = Shrink" << std::endl; + std::cout << "[NelderMead|DEBUG] State = ContractionOUT" << std::endl; #endif - f[vh] = evaluate_score(objectives, nullptr); + fc = evaluate_score(objectives, nullptr); double profiled[] = {knob1, knob2}; my_constraints(profiled); - if ( v[vh][0] != profiled[0] || v[vh][1] != profiled[1] ) { - std::cout << "[NelderMead|WARN] Meant to profile shrink " << v[vh][0] << " knob1 " + if ( vc[0] != profiled[0] || vc[1] != profiled[1] ) { + std::cout << "[NelderMead|WARN] Meant to profile contract " << vc[0] << " knob1 " "but ended up using " << profiled[0] << std::endl; - std::cout << "[NelderMead|WARN] Meant to profile shrink " << v[vh][1] << " knob2 " + std::cout << "[NelderMead|WARN] Meant to profile contract " << vc[1] << " knob2 " "but ended up using " << profiled[1] << std::endl; - auto key = std::make_pair((int)v[vh][0], (int)v[vh][1]); + auto key = std::make_pair((int)vc[0], (int)vc[1]); auto iter = cache_.find(key); if ( iter != cache_.end() ) { iter->second.threads = profiled[0]; iter->second.freq_idx = profiled[1]; } - v[vh][0] = profiled[0]; - v[vh][1] = profiled[1]; + vc[0] = profiled[0]; + vc[1] = profiled[1]; - cache_update((int)v[vh][0], (int)v[vh][1], objectives, true); + cache_update((int)vc[0], (int)vc[1], objectives, true); } - const int threads = (int)(v[vh][0]); - const int freq_idx = (int)(v[vh][1]); + if (fc <= fr) + { + // VV: CONTRACTED_O is better than REFLECTED + // Replace WORST with CONTRACTED_O + for (j = 0; j < NMD_NUM_KNOBS; j++) + { + v[vg][j] = vc[j]; + } + f[vg] = fc; - cache_update(threads, freq_idx, objectives, true); + const int threads = (int)(v[vg][0]); + const int freq_idx = (int)(v[vg][1]); - return do_step_start(); + cache_update(threads, freq_idx, objectives, true); + return do_step_start(); + } + else + { + state_ = shrink; + return do_step_shrink(); + } +} + +optstepresult NelderMead::do_step_shrink() +{ +#ifdef NMD_DEBUG_ + std::cout << "[NelderMead|DEBUG] State = Shrink" << std::endl; +#endif + for (auto i=0ul; i double* { + for (j = 0; j < NMD_NUM_KNOBS; j++) + vr[j] = vm[j] + DELTA * (v[i][j] - vm[j]) - extra[j]; + + my_constraints(vr); + + return vr; + }; + + generate_new(gen_new); + } + + state_ = warmup; + warming_up_step = 0; + return do_step_warmup({}, 0, 0); +} + +optstepresult NelderMead::do_step_warmup(const double objectives[], + double knob1, double knob2) +{ + #ifdef NMD_DEBUG_ + std::cout << "[NelderMead|DEBUG] State = Warmup " + << warming_up_step << std::endl; + #endif + + OUT_DEBUG( + if ( warming_up_step == 0 ) { + std::cout << "[NelderMead|DEBUG] Initial exploration" << std::endl; + + for ( auto i =0; i 0 && warming_up_step <= NMD_NUM_KNOBS + 1) { + double profiled[] = {knob1, knob2}; + my_constraints(profiled); + + if ( v[warming_up_step-1][0] != profiled[0] || v[warming_up_step-1][1] != profiled[1] ) { + std::cout << "[NelderMead|WARN] Meant to profile expand " << v[warming_up_step-1][0] << " knob1 " + "but ended up using " << profiled[0] << std::endl; + std::cout << "[NelderMead|WARN] Meant to profile expand " << v[warming_up_step-1][1] << " knob2 " + "but ended up using " << profiled[1] << std::endl; + + auto key = std::make_pair((int)v[warming_up_step-1][0], (int)v[warming_up_step-1][1]); + auto iter = cache_.find(key); + if ( iter != cache_.end() ) { + iter->second.threads = profiled[0]; + iter->second.freq_idx = profiled[1]; + } + + v[warming_up_step-1][0] = profiled[0]; + v[warming_up_step-1][1] = profiled[1]; + } + + // VV: Record results of last warming up step + f[warming_up_step-1] = evaluate_score(objectives, nullptr); + cache_update(v[warming_up_step-1][0], v[warming_up_step-1][1], + objectives, true); + } + + if ( warming_up_step == NMD_NUM_KNOBS + 1) { + // VV: We need not explore the knob_set space anymore + state_ = start; + return step(objectives, knob1, knob2); + } else if (warming_up_step > NMD_NUM_KNOBS + 1) { + std::cout << "[NelderMead|Warn] Unknown warmup step " << warming_up_step << std::endl; + } + optstepresult res; + + res.objectives[0] = -1; + res.objectives[1] = -1; + res.objectives[2] = -1; + res.converged = false; + + res.threads = initial_configurations[warming_up_step][0]; + res.freq_idx = initial_configurations[warming_up_step][1]; + + v[warming_up_step][0] = res.threads; + v[warming_up_step][1] = res.freq_idx; + warming_up_step++; + + return res; } optstepresult NelderMead::step(const double objectives[], @@ -1161,72 +1233,7 @@ optstepresult NelderMead::step(const double objectives[], { case warmup: { - #ifdef NMD_DEBUG_ - std::cout << "[NelderMead|DEBUG] State = Warmup " - << warming_up_step << std::endl; - #endif - - OUT_DEBUG( - if ( warming_up_step == 0 ) { - std::cout << "[NelderMead|DEBUG] Initial exploration" << std::endl; - - for ( auto i =0; i 0 && warming_up_step <= NMD_NUM_KNOBS + 1) { - double profiled[] = {knob1, knob2}; - my_constraints(profiled); - - if ( v[warming_up_step-1][0] != profiled[0] || v[warming_up_step-1][1] != profiled[1] ) { - std::cout << "[NelderMead|WARN] Meant to profile expand " << v[warming_up_step-1][0] << " knob1 " - "but ended up using " << profiled[0] << std::endl; - std::cout << "[NelderMead|WARN] Meant to profile expand " << v[warming_up_step-1][1] << " knob2 " - "but ended up using " << profiled[1] << std::endl; - - auto key = std::make_pair((int)v[warming_up_step-1][0], (int)v[warming_up_step-1][1]); - auto iter = cache_.find(key); - if ( iter != cache_.end() ) { - iter->second.threads = profiled[0]; - iter->second.freq_idx = profiled[1]; - } - - v[warming_up_step-1][0] = profiled[0]; - v[warming_up_step-1][1] = profiled[1]; - } - - // VV: Record results of last warming up step - f[warming_up_step-1] = evaluate_score(objectives, nullptr); - cache_update(v[warming_up_step-1][0], v[warming_up_step-1][1], - objectives, true); - } - - if ( warming_up_step == NMD_NUM_KNOBS + 1) { - // VV: We need not explore the knob_set space anymore - state_ = start; - return step(objectives, knob1, knob2); - } else if (warming_up_step > NMD_NUM_KNOBS + 1) { - std::cout << "[NelderMead|Warn] Unknown warmup step " << warming_up_step << std::endl; - } - - res.objectives[0] = -1; - res.objectives[1] = -1; - res.objectives[2] = -1; - res.converged = false; - - res.threads = initial_configurations[warming_up_step][0]; - res.freq_idx = initial_configurations[warming_up_step][1]; - - v[warming_up_step][0] = res.threads; - v[warming_up_step][1] = res.freq_idx; - warming_up_step++; - + res = do_step_warmup(objectives, knob1, knob2); break; } break; @@ -1240,11 +1247,11 @@ optstepresult NelderMead::step(const double objectives[], case expansion: res = do_step_expand(objectives, knob1, knob2); break; - case contraction: - res = do_step_contract(objectives, knob1, knob2); + case contraction_in: + res = do_step_contract_in(objectives, knob1, knob2); break; - case shrink: - res = do_step_shrink(objectives, knob1, knob2); + case contraction_out: + res = do_step_contract_out(objectives, knob1, knob2); break; default: std::cout << "Unknown NelderMead state (" << state_ << ")" << std::endl; diff --git a/src/dashboard.cpp b/src/dashboard.cpp index 52a6890..8de511f 100644 --- a/src/dashboard.cpp +++ b/src/dashboard.cpp @@ -24,7 +24,7 @@ // VV: Define this to use time/energy/resources instead of speed/energy/efficiency -#define ALTERNATIVE_SCORE +// #define ALTERNATIVE_SCORE namespace allscale { namespace dashboard { diff --git a/src/optimizer.cpp b/src/optimizer.cpp index 4b29f05..389aa5e 100644 --- a/src/optimizer.cpp +++ b/src/optimizer.cpp @@ -198,7 +198,7 @@ float estimate_power(float frequency) global_optimizer::global_optimizer() : u_balance_every(10), u_steps_till_rebalance(u_balance_every), - active_nodes_(allscale::get_num_localities(), true), tuner_(new simple_coordinate_descent(tuner_configuration{active_nodes_, allscale::monitor::get().get_current_freq(0)})), + active_nodes_(allscale::get_num_localities(), true), objective_(get_default_objective()), active_(true), localities_(hpx::find_all_localities()), f_resource_max(-1.0f), f_resource_leeway(-1.0f), @@ -208,6 +208,8 @@ global_optimizer::global_optimizer() last_optimization_score(1.0) { char *const c_policy = std::getenv("ALLSCALE_SCHEDULING_POLICY"); + char *const c_tuner = std::getenv("ALLSCALE_TUNER"); + std::string input_objective_str = hpx::get_config_entry("allscale.objective", ""); @@ -237,12 +239,13 @@ global_optimizer::global_optimizer() f_resource_max = atof(c_resource_max); nodes_min = f_resource_leeway * localities_.size(); - nodes_max = localities_.size(); - - if ( nodes_min < 1 ) - nodes_min = 1; } + nodes_max = localities_.size(); + + if ( nodes_min < 1 ) + nodes_min = 1; + if ( c_policy && strcasecmp(c_policy, "ino")) o_ino = allscale::components::internode_optimizer_t(localities_.size(), (double) f_resource_max, @@ -264,6 +267,15 @@ global_optimizer::global_optimizer() objectives_scale[0] = 0.5; objectives_scale[1] = 1.0; objectives_scale[2] = 1.0; + + if (c_policy && strcasecmp(c_policy, "neldermead")) { + std::cout << "Choosing NelderMead Optimizer for global optimization" << std::endl; + tuner_ = std::make_unique(nodes_min, nodes_max); + } + else { + std::cout << "Choosing Coordinate Descent Optimizer for global optimization" << std::endl; + tuner_ = std::make_unique(tuner_configuration{active_nodes_, allscale::monitor::get().get_current_freq(0)}); + } } double global_optimizer::get_optimization_score() diff --git a/src/tuner.cpp b/src/tuner.cpp index 546a2be..f3d421e 100644 --- a/src/tuner.cpp +++ b/src/tuner.cpp @@ -4,6 +4,8 @@ #include #include #include +#include + namespace allscale { std::ostream& operator<<(std::ostream& os, tuner_configuration const& cfg) @@ -204,4 +206,105 @@ namespace allscale { // print a status message std::cerr << "New search direction: " << (dim == num_nodes ? "#nodes" : "frequency") << " " << (dir == up ? "up" : "down") << "\n"; } + + nmd_optimizer::nmd_optimizer(std::size_t nodes_min, + std::size_t nodes_max) + : nmd(2, 3, 0.01, 2000, 50ul) + , converged(false) + { + constraint_min[0] = nodes_min; + constraint_max[0] = nodes_max; + + avail_freqs = monitor::get().get_available_freqs(0); + std::sort(avail_freqs.begin(), avail_freqs.end()); + + if ( avail_freqs.size() ) { + constraint_min[1] = 0; + constraint_max[1] = avail_freqs.size() - 1; + } else { + constraint_min[1] = 0; + constraint_max[1] = 0; + } + + previous_weights[0] = 0; + previous_weights[1] = 0; + previous_weights[2] = 0; + } + + tuner_configuration nmd_optimizer::next(tuner_configuration const& current_cfg, tuner_state const& current_state, tuning_objective obj) + { + tuner_configuration res; + auto action = std::vector(); + std::cout << "Initializing NMD" << std::endl; + + const double weights[] = { + obj.speed_exponent, obj.efficiency_exponent, obj.power_exponent + }; + + double diff = 0.0; + + for (auto i=0ul; i<3; ++i) + diff += abs(previous_weights[i] - weights[i]); + + if ( diff > 0.01 ) { + // VV: Enforce exploration + initialized = false; + this->converged = false; + } + + for (auto i=0ul; i<3; ++i) + previous_weights[i] = weights[i]; + + if ( initialized == false ){ + nmd.initialize(constraint_min, + constraint_max, + nullptr, + weights, + &nmd.score_speed_efficiency_power); + initialized = true; + } + + if ( this->converged == false ) { + double measurements[3] = {current_state.speed, current_state.efficiency, current_state.power}; + + std::size_t num_active_nodes = std::count(current_cfg.node_mask.begin(), + current_cfg.node_mask.end(), + true); + std::size_t freq_idx; + auto e = std::find(avail_freqs.begin(), avail_freqs.end(), current_cfg.frequency); + + if ( e == avail_freqs.end() ) + freq_idx = 0; + else + freq_idx = e - avail_freqs.begin(); + + const std::size_t observed[] = {num_active_nodes, freq_idx}; + auto ret = nmd.get_next(measurements, observed); + action.assign(ret.first.begin(), ret.first.end()); + auto converged = ret.second; + + if (converged) { + best.assign(action.begin(), action.end()); + this->converged = true; + } + } else { + action.assign(best.begin(), best.end()); + } + + res.node_mask.assign(current_cfg.node_mask.begin(), + current_cfg.node_mask.end()); + + for (auto i=0ul; i Date: Mon, 26 Nov 2018 17:02:31 +0000 Subject: [PATCH 33/37] Disabled verbose logging for NMD implementations --- src/components/nmd.cpp | 4 ++-- src/components/nmsimplex_bbincr.cpp | 3 +-- src/tuner.cpp | 1 - 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/src/components/nmd.cpp b/src/components/nmd.cpp index 5893b5b..bb59b1a 100644 --- a/src/components/nmd.cpp +++ b/src/components/nmd.cpp @@ -7,8 +7,8 @@ #include -#define NMD_DEBUG_ -#define NMD_INFO_ +//#define NMD_DEBUG_ +//#define NMD_INFO_ #ifdef NMD_DEBUG_ #define OUT_DEBUG(X) X diff --git a/src/components/nmsimplex_bbincr.cpp b/src/components/nmsimplex_bbincr.cpp index 430afac..82ae4e9 100644 --- a/src/components/nmsimplex_bbincr.cpp +++ b/src/components/nmsimplex_bbincr.cpp @@ -14,8 +14,7 @@ #include #include -#define NMD_DEBUG_ 1 -#define NMD_INFO_ 1 +//#define NMD_DEBUG_ 1 #ifdef NMD_DEBUG_ #define OUT_DEBUG(X) X diff --git a/src/tuner.cpp b/src/tuner.cpp index f3d421e..2203687 100644 --- a/src/tuner.cpp +++ b/src/tuner.cpp @@ -235,7 +235,6 @@ namespace allscale { { tuner_configuration res; auto action = std::vector(); - std::cout << "Initializing NMD" << std::endl; const double weights[] = { obj.speed_exponent, obj.efficiency_exponent, obj.power_exponent From 65f6a771163ae0706d985db80d2003e3971ae907 Mon Sep 17 00:00:00 2001 From: Vassilis Vassiliadis Date: Tue, 27 Nov 2018 09:28:51 +0000 Subject: [PATCH 34/37] Disabled more debug printouts (re-enable #defines to get the functionality back) --- allscale/components/localoptimizer.hpp | 6 +++--- src/components/localoptimizer.cpp | 11 ++++++----- src/components/scheduler_component.cpp | 4 ++-- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/allscale/components/localoptimizer.hpp b/allscale/components/localoptimizer.hpp index 96d1f5f..9e11ebb 100644 --- a/allscale/components/localoptimizer.hpp +++ b/allscale/components/localoptimizer.hpp @@ -15,9 +15,9 @@ #include //#define MEASURE_MANUAL_ 1 -#define MEASURE_ 1 -#define DEBUG_ 1 -#define DEBUG_MULTIOBJECTIVE_ 1 +// #define MEASURE_ 1 +// #define DEBUG_ 1 +// #define DEBUG_MULTIOBJECTIVE_ 1 namespace allscale { diff --git a/src/components/localoptimizer.cpp b/src/components/localoptimizer.cpp index 52989d8..da51330 100644 --- a/src/components/localoptimizer.cpp +++ b/src/components/localoptimizer.cpp @@ -10,12 +10,12 @@ #include #include -#define DEBUG_ 1 +//#define DEBUG_ 1 //#define DEBUG_INIT_ 1 // define to generate output during scheduler initialization -#define DEBUG_MULTIOBJECTIVE_ 1 -#define DEBUG_CONVERGENCE_ 1 +//#define DEBUG_MULTIOBJECTIVE_ 1 +//#define DEBUG_CONVERGENCE_ 1 //#define MEASURE_MANUAL 1 // define to generate output consumed by the regression test -#define MEASURE_ 1 +//#define MEASURE_ 1 // only meant to be defined if one needs to measure the efficacy // of the scheduler //#define ALLSCALE_HAVE_CPUFREQ 1 @@ -111,7 +111,7 @@ bool localoptimizer::isConverged() #endif return converged_; } - +#ifdef DEBUG_ void localoptimizer::printverbosesteps(actuation act) { static int last_frequency_idx = 0; @@ -130,6 +130,7 @@ void localoptimizer::printverbosesteps(actuation act) std::cout << " , CPU Frequency to " << frequencies_param_allowed_[last_frequency_idx] << std::endl; } +#endif void localoptimizer::accumulate_objective_measurements() { diff --git a/src/components/scheduler_component.cpp b/src/components/scheduler_component.cpp index 2b865bf..3caa522 100644 --- a/src/components/scheduler_component.cpp +++ b/src/components/scheduler_component.cpp @@ -22,12 +22,12 @@ //#define DEBUG_ 1 //#define DEBUG_INIT_ 1 // define to generate output during scheduler initialization -#define DEBUG_MULTIOBJECTIVE_ 1 +// #define DEBUG_MULTIOBJECTIVE_ 1 //#define DEBUG_THREADTHROTTLING_ 1 //#define DEBUG_THREADSTATUS_ 1 //#define DEBUG_FREQSCALING_ 1 //#define MEASURE_MANUAL 1 // define to generate output consumed by the regression test -#define MEASURE_ 1 +// #define MEASURE_ 1 // only meant to be defined if one needs to measure the efficacy // of the scheduler #undef DEBUG_ From 36a95e282121b87485403f3ff60878b4699d7109 Mon Sep 17 00:00:00 2001 From: Vassilis Vassiliadis Date: Tue, 8 Jan 2019 11:12:23 +0000 Subject: [PATCH 35/37] Enable elasticity when *any* kind of objective is selected --- src/scheduler.cpp | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/src/scheduler.cpp b/src/scheduler.cpp index f1be5bc..688b682 100644 --- a/src/scheduler.cpp +++ b/src/scheduler.cpp @@ -75,7 +75,7 @@ namespace allscale std::string input_objective_str = hpx::get_config_entry("allscale.objective", "none"); // std::cerr << " Scheduler objective is " << input_objective_str << "\n"; - bool enable_elasticity = false; + bool enable_elasticity = true; if ( !input_objective_str.empty() ) { std::istringstream iss_leeways(input_objective_str); @@ -92,17 +92,6 @@ namespace allscale obj = objective_str.substr(0, idx); leeway = std::stod( objective_str.substr(idx + 1) ); } - - if (obj == "time") - { - enable_elasticity = true; - break; - } - else if (obj == "resource") - { - enable_elasticity = true; - break; - } } } From 0d518ed267ac63caa56076df10fb66da36a3c5bf Mon Sep 17 00:00:00 2001 From: Vassilis Vassiliadis Date: Tue, 8 Jan 2019 11:14:22 +0000 Subject: [PATCH 36/37] Enable elasticity when *any* kind of objective is selected previous commit was incomplete ... --- src/scheduler.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/scheduler.cpp b/src/scheduler.cpp index 688b682..05a7479 100644 --- a/src/scheduler.cpp +++ b/src/scheduler.cpp @@ -75,7 +75,7 @@ namespace allscale std::string input_objective_str = hpx::get_config_entry("allscale.objective", "none"); // std::cerr << " Scheduler objective is " << input_objective_str << "\n"; - bool enable_elasticity = true; + bool enable_elasticity = false; if ( !input_objective_str.empty() ) { std::istringstream iss_leeways(input_objective_str); @@ -93,6 +93,8 @@ namespace allscale leeway = std::stod( objective_str.substr(idx + 1) ); } } + + enable_elasticity = true; } rp.set_default_pool_name("allscale-numa-0"); From 9264d07a897b684baedf3f7bf02a9b8e25951d65 Mon Sep 17 00:00:00 2001 From: Vassilis Vassiliadis Date: Tue, 8 Jan 2019 14:21:59 +0000 Subject: [PATCH 37/37] Fixed segmentation fault in power upkeeping --- src/components/scheduler_component.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/components/scheduler_component.cpp b/src/components/scheduler_component.cpp index 3caa522..1a5ae8e 100644 --- a/src/components/scheduler_component.cpp +++ b/src/components/scheduler_component.cpp @@ -675,14 +675,16 @@ void scheduler::optimize_locally(work_item const& work) active_threads}; lopt_.measureObjective(current_avg_iter_time,power_sum/(last_power_usage*monitor_c->get_max_power()), active_threads); - last_power_usage=0; - power_sum=0; last_objective_score = lopt_.evaluate_score(last_objectives); auto power_dt = t_duration_now - last_measure_power; update_power_consumption(power_sum/last_power_usage, power_dt); last_measure_power = t_duration_now; + + // VV: instead of starting from scratch, remember the last power measurement + last_power_usage=1; + power_sum=current_power_usage; } elapsedTimeMs = t_duration_now - last_optimization_timestamp_;