diff --git a/numpy/numba/README.md b/numpy/numba/README.md index 507e15c..35bd561 100644 --- a/numpy/numba/README.md +++ b/numpy/numba/README.md @@ -71,3 +71,433 @@ Some example for array signatures are: ## An example (up to 437x faster) +For measuring the time used by the program I ran everything twice and took the second time. I did this is because the just-in-time compilation takes a moment for the first call of a function. + +### Basis code (7.76 sec) + +This is the basic code without any optimizations. + +```python +import time +import numpy as np + + +def get_spike( + h: np.ndarray, number_of_neurons: np.uint64, random_number: np.float64 +) -> np.uint64: + + summation: np.float64 = np.float64(0.0) + + output: np.uint64 = np.uint64(number_of_neurons - 1) + + for i in range(0, np.uint64(number_of_neurons - 1)): + summation += h[i] + + if random_number <= summation: + output = np.uint64(i) + return output + + return output + + +def main( + number_of_iterations: np.uint64, + number_of_neurons: np.uint64, + random_number_spikes: np.ndarray, + random_number_h: np.ndarray, +) -> np.ndarray: + + results = np.zeros((number_of_iterations), dtype=np.uint64) + + for i in range(0, number_of_iterations): + h = random_number_h[i, :] + h /= h.sum() + results[i] = get_spike(h, number_of_neurons, random_number_spikes[i]) + + return results + + +if __name__ == "__main__": + number_of_iterations: np.uint64 = np.uint64(10000) + number_of_neurons: np.uint64 = np.uint64(10000) + myrng = np.random.default_rng() + + random_number_spikes = myrng.random((number_of_iterations), dtype=np.float64) + random_number_h = myrng.random( + (number_of_iterations, number_of_neurons), dtype=np.float64 + ) + + start_time = time.perf_counter() + results = main( + number_of_iterations=number_of_iterations, + number_of_neurons=number_of_neurons, + random_number_spikes=random_number_spikes, + random_number_h=random_number_h, + ) + end_time = time.perf_counter() + + check_for_errors = np.sum([results >= number_of_neurons]) + if check_for_errors > 0: + print("Something went really wrong! Panic!") + print(f"{end_time-start_time:.5f} sec") + print(results[0:10]) +``` + +### Optimization 1 (0.482sec) + + +We add just-in-time compilation to the function get_spike with @njit(cache=True). ["To avoid compilation times each time you invoke a Python program, you can instruct Numba to write the result of function compilation into a file-based cache."](https://numba.pydata.org/numba-doc/latest/user/jit.html#cache) + +```python +import time +import numpy as np +from numba import njit + + +@njit(cache=True) +def get_spike( + h: np.ndarray, number_of_neurons: np.uint64, random_number: np.float64 +) -> np.uint64: + + summation: np.float64 = np.float64(0.0) + + output: np.uint64 = np.uint64(number_of_neurons - 1) + + for i in range(0, np.uint64(number_of_neurons - 1)): + summation += h[i] + + if random_number <= summation: + output = np.uint64(i) + return output + + return output + + +def main( + number_of_iterations: np.uint64, + number_of_neurons: np.uint64, + random_number_spikes: np.ndarray, + random_number_h: np.ndarray, +) -> np.ndarray: + + results = np.zeros((number_of_iterations), dtype=np.uint64) + + for i in range(0, number_of_iterations): + h = random_number_h[i, :] + h /= h.sum() + results[i] = get_spike(h, number_of_neurons, random_number_spikes[i]) + + return results + + +if __name__ == "__main__": + number_of_iterations: np.uint64 = np.uint64(10000) + number_of_neurons: np.uint64 = np.uint64(10000) + myrng = np.random.default_rng() + + random_number_spikes = myrng.random((number_of_iterations), dtype=np.float64) + random_number_h = myrng.random( + (number_of_iterations, number_of_neurons), dtype=np.float64 + ) + + start_time = time.perf_counter() + results = main( + number_of_iterations=number_of_iterations, + number_of_neurons=number_of_neurons, + random_number_spikes=random_number_spikes, + random_number_h=random_number_h, + ) + end_time = time.perf_counter() + + check_for_errors = np.sum([results >= number_of_neurons]) + if check_for_errors > 0: + print("Something went really wrong! Panic!") + print(f"{end_time-start_time:.5f} sec") + print(results[0:10]) +``` + + +### Optimization 2 (0.627sec) + +We also add just-in-time compilation to the function main with @njit(cache=True). + +```python +import time +import numpy as np +from numba import njit + + +@njit(cache=True) +def get_spike( + h: np.ndarray, number_of_neurons: np.uint64, random_number: np.float64 +) -> np.uint64: + + summation: np.float64 = np.float64(0.0) + + output: np.uint64 = np.uint64(number_of_neurons - 1) + + for i in range(0, np.uint64(number_of_neurons - 1)): + summation += h[i] + + if random_number <= summation: + output = np.uint64(i) + return output + + return output + + +@njit(cache=True) +def main( + number_of_iterations: np.uint64, + number_of_neurons: np.uint64, + random_number_spikes: np.ndarray, + random_number_h: np.ndarray, +) -> np.ndarray: + + results = np.zeros((number_of_iterations), dtype=np.uint64) + + for i in range(0, number_of_iterations): + h = random_number_h[i, :] + h /= h.sum() + results[i] = get_spike(h, number_of_neurons, random_number_spikes[i]) + + return results + + +if __name__ == "__main__": + number_of_iterations: np.uint64 = np.uint64(10000) + number_of_neurons: np.uint64 = np.uint64(10000) + myrng = np.random.default_rng() + + random_number_spikes = myrng.random((number_of_iterations), dtype=np.float64) + random_number_h = myrng.random( + (number_of_iterations, number_of_neurons), dtype=np.float64 + ) + + start_time = time.perf_counter() + results = main( + number_of_iterations=number_of_iterations, + number_of_neurons=number_of_neurons, + random_number_spikes=random_number_spikes, + random_number_h=random_number_h, + ) + end_time = time.perf_counter() + + check_for_errors = np.sum([results >= number_of_neurons]) + if check_for_errors > 0: + print("Something went really wrong! Panic!") + print(f"{end_time-start_time:.5f} sec") + print(results[0:10]) +``` + +### Optimization 3 (0.619sec) + +We add [function signatures](https://numba.pydata.org/numba-doc/latest/reference/types.html) to the code with: + +```python +@njit( + numba.types.uint64(numba.types.float64[:], numba.types.uint64, numba.types.float64), + cache=True, +) +def get_spike( + h: np.ndarray, number_of_neurons: np.uint64, random_number: np.float64 +) -> np.uint64: + + +[...] + +@njit( + numba.types.uint64[:]( + numba.types.uint64, + numba.types.uint64, + numba.types.float64[:], + numba.types.float64[:, :], + ), + cache=True, +) +def main( + number_of_iterations: np.uint64, + number_of_neurons: np.uint64, + random_number_spikes: np.ndarray, + random_number_h: np.ndarray, +) -> np.ndarray: +``` + + +```python +import time +import numpy as np +from numba import njit +import numba + + +@njit( + numba.types.uint64(numba.types.float64[:], numba.types.uint64, numba.types.float64), + cache=True, +) +def get_spike( + h: np.ndarray, number_of_neurons: np.uint64, random_number: np.float64 +) -> np.uint64: + + summation: np.float64 = np.float64(0.0) + + output: np.uint64 = np.uint64(number_of_neurons - 1) + + for i in range(0, np.uint64(number_of_neurons - 1)): + summation += h[i] + + if random_number <= summation: + output = np.uint64(i) + return output + + return output + + +@njit( + numba.types.uint64[:]( + numba.types.uint64, + numba.types.uint64, + numba.types.float64[:], + numba.types.float64[:, :], + ), + cache=True, +) +def main( + number_of_iterations: np.uint64, + number_of_neurons: np.uint64, + random_number_spikes: np.ndarray, + random_number_h: np.ndarray, +) -> np.ndarray: + + results = np.zeros((number_of_iterations), dtype=np.uint64) + + for i in range(0, number_of_iterations): + h = random_number_h[i, :] + h /= h.sum() + results[i] = get_spike(h, number_of_neurons, random_number_spikes[i]) + + return results + + +if __name__ == "__main__": + number_of_iterations: np.uint64 = np.uint64(10000) + number_of_neurons: np.uint64 = np.uint64(10000) + myrng = np.random.default_rng() + + random_number_spikes = myrng.random((number_of_iterations), dtype=np.float64) + random_number_h = myrng.random( + (number_of_iterations, number_of_neurons), dtype=np.float64 + ) + + start_time = time.perf_counter() + results = main( + number_of_iterations=number_of_iterations, + number_of_neurons=number_of_neurons, + random_number_spikes=random_number_spikes, + random_number_h=random_number_h, + ) + end_time = time.perf_counter() + + check_for_errors = np.sum([results >= number_of_neurons]) + if check_for_errors > 0: + print("Something went really wrong! Panic!") + print(f"{end_time-start_time:.5f} sec") + print(results[0:10]) +``` + +### Optimization 4 (0.419sec) + +We tell numba about the [C memory layout](https://numba.pydata.org/numba-doc/latest/reference/types.html#arrays) of the arrays with refining the function signature: + +```python +@njit( + numba.types.uint64[::1]( + numba.types.uint64, + numba.types.uint64, + numba.types.float64[::1], + numba.types.float64[:, ::1], + ), + cache=True, +) +``` + +```python +import time +import numpy as np +from numba import njit +import numba + + +@njit( + numba.types.uint64(numba.types.float64[:], numba.types.uint64, numba.types.float64), + cache=True, +) +def get_spike( + h: np.ndarray, number_of_neurons: np.uint64, random_number: np.float64 +) -> np.uint64: + + summation: np.float64 = np.float64(0.0) + + output: np.uint64 = np.uint64(number_of_neurons - 1) + + for i in range(0, np.uint64(number_of_neurons - 1)): + summation += h[i] + + if random_number <= summation: + output = np.uint64(i) + return output + + return output + + +@njit( + numba.types.uint64[::1]( + numba.types.uint64, + numba.types.uint64, + numba.types.float64[::1], + numba.types.float64[:, ::1], + ), + cache=True, +) +def main( + number_of_iterations: np.uint64, + number_of_neurons: np.uint64, + random_number_spikes: np.ndarray, + random_number_h: np.ndarray, +) -> np.ndarray: + + results = np.zeros((number_of_iterations), dtype=np.uint64) + + for i in range(0, number_of_iterations): + h = random_number_h[i, :] + h /= h.sum() + results[i] = get_spike(h, number_of_neurons, random_number_spikes[i]) + + return results + + +if __name__ == "__main__": + number_of_iterations: np.uint64 = np.uint64(10000) + number_of_neurons: np.uint64 = np.uint64(10000) + myrng = np.random.default_rng() + + random_number_spikes = myrng.random((number_of_iterations), dtype=np.float64) + random_number_h = myrng.random( + (number_of_iterations, number_of_neurons), dtype=np.float64 + ) + + start_time = time.perf_counter() + results = main( + number_of_iterations=number_of_iterations, + number_of_neurons=number_of_neurons, + random_number_spikes=random_number_spikes, + random_number_h=random_number_h, + ) + end_time = time.perf_counter() + + check_for_errors = np.sum([results >= number_of_neurons]) + if check_for_errors > 0: + print("Something went really wrong! Panic!") + print(f"{end_time-start_time:.5f} sec") + print(results[0:10]) +```