-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathelbow_and_cluster_plot.py
More file actions
51 lines (41 loc) · 1.62 KB
/
elbow_and_cluster_plot.py
File metadata and controls
51 lines (41 loc) · 1.62 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
# Load the embeddings from the CSV file
embedding_df = pd.read_csv(r'C:\Users\user\Desktop\failure_embeddings.csv') # Replace with the actual path to your embeddings CSV file
# Sidebar for selecting the number of clusters (K)
k_value = st.sidebar.slider("Select the number of clusters (K)", 1, 25, 5)
# Get the embeddings
embeddings = embedding_df['vector'].apply(eval).tolist()
# Perform K-means clustering
kmeans = KMeans(n_clusters=k_value, random_state=42)
embedding_df['cluster'] = kmeans.fit_predict(embeddings)
# Display the Elbow Plot
st.subheader("Elbow Plot")
inertia_values = []
for i in range(1, 26):
kmeans = KMeans(n_clusters=i, random_state=42)
kmeans.fit(embeddings)
inertia_values.append(kmeans.inertia_)
fig, ax = plt.subplots()
ax.plot(range(1, 26), inertia_values, marker='o')
ax.set_xlabel('Number of Clusters (K)')
ax.set_ylabel('Inertia')
st.pyplot(fig)
# Display the Cluster Plot
st.subheader("Cluster Plot")
# Reduce dimensionality with PCA for visualization
pca = PCA(n_components=2)
pca_result = pca.fit_transform(embeddings)
embedding_df['pca1'] = pca_result[:, 0]
embedding_df['pca2'] = pca_result[:, 1]
# Scatter plot
fig, ax = plt.subplots()
sns.scatterplot(x='pca1', y='pca2', hue='cluster', data=embedding_df, palette='viridis', ax=ax)
ax.set_xlabel('Principal Component 1')
ax.set_ylabel('Principal Component 2')
ax.set_title(f'K-means Clustering (K={k_value})')
st.pyplot(fig)