From 6cc6c9e48b1985892f34844beb355f0d16c00d0b Mon Sep 17 00:00:00 2001 From: David Rotermund <54365609+davrot@users.noreply.github.com> Date: Fri, 16 Feb 2024 00:37:22 +0100 Subject: [PATCH] Update README.md Signed-off-by: David Rotermund <54365609+davrot@users.noreply.github.com> --- numpy/roc/README.md | 74 ++++++++++++++++++++------------------------- 1 file changed, 33 insertions(+), 41 deletions(-) diff --git a/numpy/roc/README.md b/numpy/roc/README.md index 8dd2cf6..6f32716 100644 --- a/numpy/roc/README.md +++ b/numpy/roc/README.md @@ -60,7 +60,9 @@ a_x = rng.normal(1.5, 1.0, size=(5000)) b_x = rng.normal(0.0, 1.0, size=(5000)) data_data = np.concatenate([a_x, b_x]) -data_class = np.concatenate([np.full_like(a_x, -1), np.full_like(b_x, +1)]) +data_class = np.concatenate( + [np.full_like(a_x, -1 / a_x.shape[0]), np.full_like(b_x, +1 / b_x.shape[0])] +) idx = np.argsort(data_data) data_data = data_data[idx] @@ -76,6 +78,7 @@ plt.ylabel("Cumsum of the classes") plt.xlabel("Sorted sample id") plt.show() ``` + ![Image2](image2.png) ## How to create an estimate from the ROC cumsum maximum @@ -90,41 +93,42 @@ a_x = rng.normal(1.5, 1.0, size=(5000)) b_x = rng.normal(0.0, 1.0, size=(5000)) data_data = np.concatenate([a_x, b_x]) -data_class = np.concatenate([np.full_like(a_x, -1), np.full_like(b_x, +1)]) +data_class = np.concatenate( + [np.full_like(a_x, -1 / a_x.shape[0]), np.full_like(b_x, +1 / b_x.shape[0])] +) +data_class_id = np.concatenate([np.full_like(a_x, -1), np.full_like(b_x, +1)]) idx = np.argsort(data_data) data_data = data_data[idx] data_class = data_class[idx] +data_class_id = data_class_id[idx] data_cumsum = np.cumsum(data_class) border = np.argmax(np.abs(data_cumsum)) -variant_a = (data_class[:border] == -1).sum() + (data_class[border:] == +1).sum() - -variant_b = (data_class[:border] == +1).sum() + (data_class[border:] == -1).sum() - -estimate_a = np.concatenate( - (np.full_like(data_class[:border], -1), np.full_like(data_class[border:], +1)) -) - -estimate_b = np.concatenate( - (np.full_like(data_class[:border], +1), np.full_like(data_class[border:], -1)) -) - -if variant_a > variant_b: - print("We will use: Estimate A") - estimate = estimate_a +if data_cumsum[border] < 0: + estimate = np.concatenate( + ( + np.full_like(data_class[: border + 1], -1), + np.full_like(data_class[border + 1 :], +1), + ) + ) else: - print("We will use: Estimate B") - estimate = estimate_b + estimate = np.concatenate( + ( + np.full_like(data_class[: border + 1], +1), + np.full_like(data_class[border + 1 :], -1), + ) + ) -performance = 100.0 * (data_class == estimate).sum() / data_class.shape[0] + +performance = 100.0 * (data_class_id == estimate).sum() / data_class_id.shape[0] print(f"Performance: {performance}% correct") -plt.subplot(3, 1, 1) -idx_a = np.where(data_class == -1)[0] -idx_b = np.where(data_class == +1)[0] +plt.subplot(2, 1, 1) +idx_a = np.where(data_class < 0)[0] +idx_b = np.where(data_class > 0)[0] idx = np.arange(0, data_class.shape[0]) plt.plot(data_data[idx_a], np.zeros_like(idx_a), "c*") @@ -132,34 +136,22 @@ plt.plot(data_data[idx_b], np.zeros_like(idx_b), "m.") plt.yticks([]) plt.title("Data") -plt.subplot(3, 1, 2) -idx_a = np.where(estimate_a == -1)[0] -idx_b = np.where(estimate_a == +1)[0] -idx = np.arange(0, estimate_a.shape[0]) +plt.subplot(2, 1, 2) +idx_a = np.where(estimate < 0)[0] +idx_b = np.where(estimate > 0)[0] +idx = np.arange(0, estimate.shape[0]) plt.plot(data_data[idx_a], np.zeros_like(idx_a), "c*") plt.plot(data_data[idx_b], np.zeros_like(idx_b), "m.") plt.yticks([]) -plt.title("Estimate A") - -plt.subplot(3, 1, 3) -idx_a = np.where(estimate_b == -1)[0] -idx_b = np.where(estimate_b == +1)[0] -idx = np.arange(0, estimate_b.shape[0]) - -plt.plot(data_data[idx_a], np.zeros_like(idx_a), "c*") -plt.plot(data_data[idx_b], np.zeros_like(idx_b), "m.") -plt.yticks([]) - -plt.title("Estimate B") +plt.title("Estimate") plt.xlabel("Data Value") plt.show() ``` Output: ```python -We will use: Estimate B -Performance: 77.3% correct +Performance: 77.31% correct ``` ![Image3](image3.png)