-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgraph_efficiency.py
More file actions
122 lines (96 loc) · 4.3 KB
/
graph_efficiency.py
File metadata and controls
122 lines (96 loc) · 4.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import matplotlib.pyplot as plt
import numpy as np
with open('output/performance_results.csv', 'r') as f:
lines = f.readlines()
header = lines[0].strip().split(',')
datasets = header[1:]
data = {}
for line in lines[1:]:
if line.strip():
parts = line.strip().split(',')
version = parts[0].strip()
times = []
for i in range(1, len(parts), 2):
times.append(float(parts[i+1].strip()))
data[version] = times
n_values = [2048, 16384, 65536]
k = 16
d_values = [16, 24, 32]
peak_flops = 52e12
peak_bandwidth = 736e9
compute_efficiency = {}
memory_efficiency = {}
for idx, (n, d) in enumerate(zip(n_values, d_values)):
flops = n * k * d * 3 + n * d + k * d
memory_bytes = (n*d*8 + n*k*d*8 + n*4 +
n*d*8 + n*4 + n*d*8 +
k*4 + k*d*8 + k*d*8)
theoretical_compute_time = flops / peak_flops
theoretical_memory_time = memory_bytes / peak_bandwidth
theoretical_compute_time_ms = theoretical_compute_time * 1000
theoretical_memory_time_ms = theoretical_memory_time * 1000
for version in ['cuda_basic', 'cuda_shared', 'thrust']:
if version not in data:
continue
actual_time_ms = data[version][idx]
comp_eff = theoretical_compute_time_ms / actual_time_ms
mem_eff = theoretical_memory_time_ms / actual_time_ms
if version not in compute_efficiency:
compute_efficiency[version] = []
memory_efficiency[version] = []
compute_efficiency[version].append(comp_eff * 100)
memory_efficiency[version].append(mem_eff * 100)
x = np.arange(len(datasets))
width = 0.25
fig, ax = plt.subplots(figsize=(10, 6))
bars1 = ax.bar(x - width, compute_efficiency['cuda_basic'], width, label='CUDA Basic')
bars2 = ax.bar(x, compute_efficiency['cuda_shared'], width, label='CUDA Shared')
bars3 = ax.bar(x + width, compute_efficiency['thrust'], width, label='Thrust')
ax.set_ylabel('Compute Efficiency (%)', fontsize=12)
ax.set_xlabel('Dataset Size', fontsize=12)
ax.set_title('K-means Compute Efficiency\n(Theoretical Peak Compute Performance)', fontsize=12)
ax.set_xticks(x)
ax.set_xticklabels(datasets)
ax.set_ylim(0, 0.5)
ax.legend()
ax.grid(axis='y', alpha=0.3)
for bars in [bars1, bars2, bars3]:
for bar in bars:
height = bar.get_height()
ax.text(bar.get_x() + bar.get_width()/2., height,
f'{height:.2f}%',
ha='center', va='bottom', fontsize=9)
plt.tight_layout()
plt.savefig('output/compute_efficiency_plot.png', dpi=300, bbox_inches='tight')
print("Compute efficiency plot saved to output/compute_efficiency_plot.png")
fig, ax = plt.subplots(figsize=(10, 6))
bars1 = ax.bar(x - width, memory_efficiency['cuda_basic'], width, label='CUDA Basic')
bars2 = ax.bar(x, memory_efficiency['cuda_shared'], width, label='CUDA Shared')
bars3 = ax.bar(x + width, memory_efficiency['thrust'], width, label='Thrust')
ax.set_ylabel('Memory Efficiency (%)', fontsize=12)
ax.set_xlabel('Dataset Size', fontsize=12)
ax.set_title('K-means Memory Efficiency\n(Theoretical Peak Memory Bandwidth)', fontsize=12)
ax.set_xticks(x)
ax.set_xticklabels(datasets)
ax.legend()
ax.grid(axis='y', alpha=0.3)
ax.axhline(y=100, color='red', linestyle='--', linewidth=1, alpha=0.5, label='100% (Peak)')
for bars in [bars1, bars2, bars3]:
for bar in bars:
height = bar.get_height()
ax.text(bar.get_x() + bar.get_width()/2., height,
f'{height:.2f}%',
ha='center', va='bottom', fontsize=9)
plt.tight_layout()
plt.savefig('output/memory_efficiency_plot.png', dpi=300, bbox_inches='tight')
print("Memory efficiency plot saved to output/memory_efficiency_plot.png")
print("\nCompute Efficiency (%):")
print(f"{'Version':<15} {datasets[0]:<10} {datasets[1]:<10} {datasets[2]:<10}")
print("-" * 45)
for version in ['cuda_basic', 'cuda_shared', 'thrust']:
print(f"{version:<15} {compute_efficiency[version][0]:<10.2f} {compute_efficiency[version][1]:<10.2f} {compute_efficiency[version][2]:<10.2f}")
print("\nMemory Efficiency (%):")
print(f"{'Version':<15} {datasets[0]:<10} {datasets[1]:<10} {datasets[2]:<10}")
print("-" * 45)
for version in ['cuda_basic', 'cuda_shared', 'thrust']:
print(f"{version:<15} {memory_efficiency[version][0]:<10.2f} {memory_efficiency[version][1]:<10.2f} {memory_efficiency[version][2]:<10.2f}")