From 13495af3f034974610e381f518d0488b57cadb9d Mon Sep 17 00:00:00 2001 From: David Rotermund <54365609+davrot@users.noreply.github.com> Date: Fri, 26 Jan 2024 15:10:40 +0100 Subject: [PATCH] Update README.md Signed-off-by: David Rotermund <54365609+davrot@users.noreply.github.com> --- numpy/roc/README.md | 81 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 80 insertions(+), 1 deletion(-) diff --git a/numpy/roc/README.md b/numpy/roc/README.md index 3932296..612d77d 100644 --- a/numpy/roc/README.md +++ b/numpy/roc/README.md @@ -76,9 +76,88 @@ plt.show() ``` ![Image2](image2.png) - +## How to create an estimate from ROC ```python +import numpy as np +import matplotlib.pyplot as plt + +rng = np.random.default_rng(1) + +a_x = rng.normal(1.5, 1.0, size=(5000)) +b_x = rng.normal(0.0, 1.0, size=(5000)) + +data_data = np.concatenate([a_x, b_x]) +data_class = np.concatenate([np.full_like(a_x, -1), np.full_like(b_x, +1)]) +idx = np.argsort(data_data) + +data_data = data_data[idx] +data_class = data_class[idx] + +data_cumsum = np.cumsum(data_class) +border = np.argmax(data_cumsum) + +variant_a = (data_class[:border] == -1).sum() + (data_class[border:] == +1).sum() + +variant_b = (data_class[:border] == +1).sum() + (data_class[border:] == -1).sum() + +estimate_a = np.concatenate( + (np.full_like(data_class[:border], -1), np.full_like(data_class[border:], +1)) +) + +estimate_b = np.concatenate( + (np.full_like(data_class[:border], +1), np.full_like(data_class[border:], -1)) +) + +if variant_a > variant_b: + print("We will use: Estimate A") + estimate = estimate_a + +else: + print("We will use: Estimate B") + estimate = estimate_b + +performance = 100.0 * (data_class == estimate).sum() / data_class.shape[0] +print(f"Performance: {performance}% correct") + +plt.subplot(3, 1, 1) +idx_a = np.where(data_class == -1)[0] +idx_b = np.where(data_class == +1)[0] +idx = np.arange(0, data_class.shape[0]) + +plt.plot(data_data[idx_a], np.zeros_like(idx_a), "c*") +plt.plot(data_data[idx_b], np.zeros_like(idx_b), "m.") +plt.yticks([]) +plt.title("Data") + +plt.subplot(3, 1, 2) +idx_a = np.where(estimate_a == -1)[0] +idx_b = np.where(estimate_a == +1)[0] +idx = np.arange(0, estimate_a.shape[0]) + +plt.plot(data_data[idx_a], np.zeros_like(idx_a), "c*") +plt.plot(data_data[idx_b], np.zeros_like(idx_b), "m.") +plt.yticks([]) +plt.title("Estimate A") + +plt.subplot(3, 1, 3) +idx_a = np.where(estimate_b == -1)[0] +idx_b = np.where(estimate_b == +1)[0] +idx = np.arange(0, estimate_b.shape[0]) + +plt.plot(data_data[idx_a], np.zeros_like(idx_a), "c*") +plt.plot(data_data[idx_b], np.zeros_like(idx_b), "m.") +plt.yticks([]) + +plt.title("Estimate B") +plt.xlabel("Data Value") +plt.show() ``` +Output: +```python +We will use: Estimate B +Performance: 77.3% correct +``` +![Image3](image3.png)