diff --git a/network/h_dynamic_cnn_gpu_cpp_v1/HDynamicCNNGPU.cu b/network/h_dynamic_cnn_gpu_cpp_v1/HDynamicCNNGPU.cu index 0eb402f..3a35aa4 100644 --- a/network/h_dynamic_cnn_gpu_cpp_v1/HDynamicCNNGPU.cu +++ b/network/h_dynamic_cnn_gpu_cpp_v1/HDynamicCNNGPU.cu @@ -3,11 +3,12 @@ #include #include #include - +#include #include #include #include +// #define DEBUGSHOWTIMEGLOBAL HDynamicCNNGPU::HDynamicCNNGPU() { @@ -111,6 +112,11 @@ void HDynamicCNNGPU::entrypoint( // -------------------- assert((number_of_processes <= 0)); +#ifdef DEBUGSHOWTIMEGLOBAL + using TIME_resolution = std::chrono::nanoseconds; + auto TIME_start = std::chrono::high_resolution_clock::now(); +#endif + gpu_update( h_init_ptr, h_pointer, @@ -136,6 +142,12 @@ void HDynamicCNNGPU::entrypoint( number_of_pattern, gpu_tuning_factor); +#ifdef DEBUGSHOWTIMEGLOBAL + auto TIME_end = std::chrono::high_resolution_clock::now(); + float TIME_measured = TIME_resolution(TIME_end - TIME_start).count(); + std::cout << "Time used : " << TIME_measured/(1000.0*1000.0) << "ms" << std::endl; +#endif + return; }; @@ -193,66 +205,69 @@ __device__ void gpu_update_one_ip( spike = input_pointer + counter_spike * input_dim_c1; - if (*spike >= 0) + if (*spike < 0) { - if (epsilon_xy_dim_c0 != 0) + break; + } + + if (epsilon_xy_dim_c0 != 0) + { + epsilon_subsegment = + epsilon_xy_pointer[*spike *epsilon_xy_dim_c0] * epsilon_t_pointer[counter_spike]; + } + else + { + epsilon_subsegment = epsilon_t_pointer[counter_spike]; + } + w_ptr = weights_pointer + *spike * weights_dim_c0; + + for (size_t counter = 0; counter < h_dim; counter++) + { + h_temp[counter] = h_subsegment[counter] * w_ptr[counter]; + } + + h_temp_sum = 0.0; + + for (size_t counter = 0; counter < h_dim; counter++) + { + h_temp_sum += h_temp[counter]; + } + + if (h_temp_sum > 1E-10) + { + temp_value = epsilon_scale * epsilon_subsegment / h_temp_sum; + + for (size_t counter = 0; counter < h_dim; counter++) { - epsilon_subsegment = - epsilon_xy_pointer[*spike *epsilon_xy_dim_c0] * epsilon_t_pointer[counter_spike]; + h_temp[counter] *= temp_value; + } + + for (size_t counter = 0; counter < h_dim; counter++) + { + h_subsegment[counter] += h_temp[counter]; + } + + if (forgetting_offset_local > 0.0) + { + temp_value = + epsilon_scale * epsilon_subsegment * forgetting_offset_local; + + for (size_t counter = 0; counter < h_dim; counter++) + { + h_subsegment[counter] += temp_value; + } + + epsilon_scale *= + 1.0 + epsilon_subsegment * (1.0 + forgetting_offset); } else { - epsilon_subsegment = epsilon_t_pointer[counter_spike]; - } - w_ptr = weights_pointer + *spike * weights_dim_c0; - - for (size_t counter = 0; counter < h_dim; counter++) - { - h_temp[counter] = h_subsegment[counter] * w_ptr[counter]; - } - - h_temp_sum = 0.0; - - for (size_t counter = 0; counter < h_dim; counter++) - { - h_temp_sum += h_temp[counter]; - } - - if (h_temp_sum > 1E-10) - { - temp_value = epsilon_scale * epsilon_subsegment / h_temp_sum; - - for (size_t counter = 0; counter < h_dim; counter++) - { - h_temp[counter] *= temp_value; - } - - for (size_t counter = 0; counter < h_dim; counter++) - { - h_subsegment[counter] += h_temp[counter]; - } - - if (forgetting_offset_local > 0.0) - { - temp_value = - epsilon_scale * epsilon_subsegment * forgetting_offset_local; - - for (size_t counter = 0; counter < h_dim; counter++) - { - h_subsegment[counter] += temp_value; - } - - epsilon_scale *= - 1.0 + epsilon_subsegment * (1.0 + forgetting_offset); - } - else - { - epsilon_scale *= 1.0 + epsilon_subsegment * 1.0; - } + epsilon_scale *= 1.0 + epsilon_subsegment * 1.0; } } } + temp_value = 1.0 / epsilon_scale; for (size_t counter = 0; counter < h_dim; counter++)