mirror of
https://github.com/davrot/pytutorial.git
synced 2025-04-19 05:36:42 +02:00
Update README.md
Signed-off-by: David Rotermund <54365609+davrot@users.noreply.github.com>
This commit is contained in:
parent
f99563f1c8
commit
c74c3e120c
1 changed files with 108 additions and 1 deletions
|
@ -72,7 +72,7 @@ data_b = np.concatenate((b_x, b_y), axis=1)
|
||||||
|
|
||||||
data = np.concatenate((data_a, data_b), axis=0)
|
data = np.concatenate((data_a, data_b), axis=0)
|
||||||
|
|
||||||
kmeans = KMeans(n_clusters=2)
|
kmeans = KMeans(n_clusters=2, n_init = 10)
|
||||||
kmeans.fit(data)
|
kmeans.fit(data)
|
||||||
|
|
||||||
|
|
||||||
|
@ -90,4 +90,111 @@ plt.show()
|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
|
> **labels_** : ndarray of shape (n_samples,)
|
||||||
|
> Labels of each point
|
||||||
|
|
||||||
|
## What does the algorithm „think“ where the data points belong?
|
||||||
|
|
||||||
|
```python
|
||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
from sklearn.cluster import KMeans
|
||||||
|
|
||||||
|
rng = np.random.default_rng(1)
|
||||||
|
|
||||||
|
a_x = rng.normal(1.5, 1.0, size=(1000))[:, np.newaxis]
|
||||||
|
a_y = rng.normal(3.0, 1.0, size=(1000))[:, np.newaxis]
|
||||||
|
data_a = np.concatenate((a_x, a_y), axis=1)
|
||||||
|
|
||||||
|
b_x = rng.normal(0.0, 1.0, size=(1000))[:, np.newaxis]
|
||||||
|
b_y = rng.normal(0.0, 1.0, size=(1000))[:, np.newaxis]
|
||||||
|
data_b = np.concatenate((b_x, b_y), axis=1)
|
||||||
|
|
||||||
|
data = np.concatenate((data_a, data_b), axis=0)
|
||||||
|
|
||||||
|
kmeans = KMeans(n_clusters=2, n_init = 10)
|
||||||
|
kmeans.fit(data)
|
||||||
|
|
||||||
|
labels = kmeans.labels_
|
||||||
|
idx_0 = np.where(labels == 0)[0]
|
||||||
|
idx_1 = np.where(labels == 1)[0]
|
||||||
|
|
||||||
|
plt.plot(data[idx_0, 0], data[idx_0, 1], "r.")
|
||||||
|
plt.plot(data[idx_1, 0], data[idx_1, 1], "b.")
|
||||||
|
plt.plot(
|
||||||
|
kmeans.cluster_centers_[0, 0], kmeans.cluster_centers_[0, 1], "k*", markersize=12
|
||||||
|
)
|
||||||
|
plt.plot(
|
||||||
|
kmeans.cluster_centers_[1, 0], kmeans.cluster_centers_[1, 1], "k*", markersize=12
|
||||||
|
)
|
||||||
|
|
||||||
|
plt.show()
|
||||||
|
```
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
|
||||||
|
## [predict](https://scikit-learn.org/stable/modules/generated/sklearn.cluster.KMeans.html#sklearn.cluster.KMeans.predict)
|
||||||
|
|
||||||
|
```python
|
||||||
|
predict(X, sample_weight='deprecated')
|
||||||
|
```
|
||||||
|
|
||||||
|
> Predict the closest cluster each sample in X belongs to.
|
||||||
|
>
|
||||||
|
> In the vector quantization literature, cluster\_centers\_ is called the code book and each value returned by predict is the index of the closest code in the code book.
|
||||||
|
|
||||||
|
```python
|
||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
from sklearn.cluster import KMeans
|
||||||
|
|
||||||
|
rng = np.random.default_rng(1)
|
||||||
|
|
||||||
|
a_x = rng.normal(1.5, 1.0, size=(1000))[:, np.newaxis]
|
||||||
|
a_y = rng.normal(3.0, 1.0, size=(1000))[:, np.newaxis]
|
||||||
|
data_a = np.concatenate((a_x, a_y), axis=1)
|
||||||
|
|
||||||
|
b_x = rng.normal(0.0, 1.0, size=(1000))[:, np.newaxis]
|
||||||
|
b_y = rng.normal(0.0, 1.0, size=(1000))[:, np.newaxis]
|
||||||
|
data_b = np.concatenate((b_x, b_y), axis=1)
|
||||||
|
|
||||||
|
data = np.concatenate((data_a, data_b), axis=0)
|
||||||
|
|
||||||
|
kmeans = KMeans(n_clusters=2, n_init=10)
|
||||||
|
kmeans.fit(data)
|
||||||
|
|
||||||
|
|
||||||
|
x = np.linspace(data[:, 0].min(), data[:, 0].max(), 100)
|
||||||
|
y = np.linspace(data[:, 1].min(), data[:, 1].max(), 100)
|
||||||
|
xx, yy = np.meshgrid(x, y)
|
||||||
|
|
||||||
|
xx_r = xx.ravel()[:, np.newaxis]
|
||||||
|
yy_r = yy.ravel()[:, np.newaxis]
|
||||||
|
|
||||||
|
print(xx.shape) # -> (100, 100)
|
||||||
|
print(xx_r.shape) # -> (10000, 1)
|
||||||
|
print(yy.shape) # -> (100, 100)
|
||||||
|
print(yy_r.shape) # -> (10000, 1)
|
||||||
|
|
||||||
|
coordinates = np.concatenate((xx_r, yy_r), axis=1)
|
||||||
|
print(coordinates.shape) # -> (10000, 2)
|
||||||
|
|
||||||
|
labels = kmeans.predict(coordinates)
|
||||||
|
idx_0 = np.where(labels == 0)[0]
|
||||||
|
idx_1 = np.where(labels == 1)[0]
|
||||||
|
|
||||||
|
|
||||||
|
plt.plot(coordinates[idx_0, 0], coordinates[idx_0, 1], "r.")
|
||||||
|
plt.plot(coordinates[idx_1, 0], coordinates[idx_1, 1], "b.")
|
||||||
|
plt.plot(
|
||||||
|
kmeans.cluster_centers_[0, 0], kmeans.cluster_centers_[0, 1], "k*", markersize=12
|
||||||
|
)
|
||||||
|
plt.plot(
|
||||||
|
kmeans.cluster_centers_[1, 0], kmeans.cluster_centers_[1, 1], "k*", markersize=12
|
||||||
|
)
|
||||||
|
|
||||||
|
plt.show()
|
||||||
|
```
|
||||||
|
|
||||||
|

|
||||||
|
|
Loading…
Add table
Reference in a new issue