-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathps1.py
More file actions
92 lines (75 loc) · 2.66 KB
/
ps1.py
File metadata and controls
92 lines (75 loc) · 2.66 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
from scipy.spatial.distance import cdist
import cv2
import numpy as np
import sys
import matplotlib.pyplot as plt
import random
# numpix,colors = (36,3)
# K = 3
# iters = 3
# # 6x6 pixel photo, 3 color density
# # set each pixel itensity value to a random value
# # make our input matrix
def getDist(x1, x2):
return np.sqrt(np.sum((x1 - x2)**2))
def assignCenters(X,centroids, clusters, K):
# assign the closest center to a sample to create clusters
clusters = [[] for _ in range(K)]
for idx, sample in enumerate(X):
centroid_idx = calcCenters(sample, centroids)
clusters[centroid_idx].append(idx)
return clusters
def calcCenters( sample, centroids):
#calc distances from each sample to each center
distances = [getDist(sample, point) for point in centroids]
closest_index = np.argmin(distances)
return closest_index
def getIDS( clusters,numpix):
# each sample will get the label of the cluster it was assigned to
labels = np.empty(numpix)
for cluster_idx, cluster in enumerate(clusters):
for sample_index in cluster:
labels[sample_index] = cluster_idx
return labels
def _is_converged( centroids_old, centroids,K):
# distances between each old and new centroids, fol all centroids
distances = [getDist(centroids_old[i], centroids[i]) for i in range(K)]
return sum(distances) == 0
def plot(X,clusters,centroids):
fig, ax = plt.subplots(figsize=(12, 8))
for i, index in enumerate(clusters):
point = X[index].T
ax.scatter(*point)
for point in centroids:
ax.scatter(*point, marker="x", color='black', linewidth=2)
plt.show()
np.random.seed(42)
numpix,colors = (16,3)
K = 3
iters = 5
steps = True
X = [random.sample(range(0,255),3) for b in range(numpix)]
# list of sample indices for each cluster
clusters = [[] for _ in range(K)]
# the centers (mean feature vector) for each cluster
centroids = []
# initialize
means = np.random.choice(numpix, K, replace= False)
centroids = [X[idx] for idx in means]
# Optimize clusters
for _ in range(iters):
# Assign samples to closest centroids (create clusters)
clusters = assignCenters(X,centroids, clusters, K)
if steps:
plot(X,clusters,centroids)
# Calculate new centroids from the clusters
centroids_old = centroids
centroids = calcCenters(clusters, centroids_old)
# check if clusters have changed
if _is_converged(centroids_old, centroids,K):
break
if steps:
plot()
# Classify samples as the index of their clusters
ids = getIDS(clusters, numpix)
print(ids)