-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsocial_network_analysis.py
More file actions
1668 lines (1320 loc) · 60.1 KB
/
social_network_analysis.py
File metadata and controls
1668 lines (1320 loc) · 60.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
"""
社交媒体网络分析系统
Social Network Analysis System
"""
import heapq
from collections import defaultdict, deque
import random
class SocialNetwork:
"""
社交网络分析系统类
Social Network Analysis System Class
该类实现了社交媒体平台的网络分析功能,包括影响力传播、社区发现、最短路径计算等功能。
This class implements network analysis functions for social media platforms,
including influence propagation, community detection, shortest path calculation, etc.
图存储结构分析 Graph Storage Structure Analysis:
1. 邻接表 (Adjacency List):
- 空间复杂度 Space Complexity: O(V + E),其中V是顶点数,E是边数
- 查找邻居时间复杂度 Time Complexity for finding neighbors: O(degree of vertex)
- 添加边时间复杂度 Time Complexity for adding edge: O(1) 平均
- 适用于稀疏图 Suitable for sparse graphs
2. 邻接矩阵 (Adjacency Matrix):
- 空间复杂度 Space Complexity: O(V^2)
- 查找邻居时间复杂度 Time Complexity for finding neighbors: O(V)
- 添加边时间复杂度 Time Complexity for adding edge: O(1)
- 适用于稠密图 Suitable for dense graphs
"""
def __init__(self, n, directed=True, storage_type="adj_list"):
"""
初始化图结构
Initialize graph structure
Args:
n (int): 节点数量 Number of nodes
directed (bool): 是否是有向图 Whether the graph is directed
storage_type (str): 存储类型 ("adj_list" 或 "adj_matrix") Storage type
"""
self.n = n # 用户数量 Number of users
self.directed = directed # 是否有向 Whether directed
self.storage_type = storage_type # 存储类型 Storage type
# 存储用户属性的数据结构 Data structures for user attributes
self.user_attributes = {}
for i in range(n):
self.user_attributes[i] = {
'id': i,
'followers': 0, # 粉丝数 Number of followers
'posts': 0, # 发帖数 Number of posts
'label': i # 标签 Label (for community detection)
}
# 根据存储类型初始化图结构 Initialize graph structure based on storage type
if storage_type == "adj_list":
# 邻接表 Adjacency list
self.graph = defaultdict(list)
self.in_edges = defaultdict(list) # 反向边,用于计算粉丝列表 Reverse edges for calculating followers
elif storage_type == "adj_matrix":
# 邻接矩阵 Adjacency matrix
self.graph = [[0 for _ in range(n)] for _ in range(n)]
self.in_edges = None # 邻接矩阵不需要反向边结构 No reverse edge structure needed for adjacency matrix
else:
raise ValueError("存储类型必须是 'adj_list' 或 'adj_matrix'")
def add_edge(self, u, v):
"""
添加关注关系
Add following relationship
Args:
u (int): 关注者 Follower
v (int): 被关注者 Followee
"""
if self.storage_type == "adj_list":
# 在邻接表中添加边 Add edge to adjacency list
if v not in self.graph[u]:
self.graph[u].append(v)
if self.directed:
# 对于有向图,v的粉丝数加1 For directed graph, increment v's follower count
self.user_attributes[v]['followers'] += 1
# 同时记录反向边 Also record reverse edge
if u not in self.in_edges[v]:
self.in_edges[v].append(u)
else: # adj_matrix
# 在邻接矩阵中添加边 Add edge to adjacency matrix
self.graph[u][v] = 1
if self.directed:
# 对于有向图,v的粉丝数加1 For directed graph, increment v's follower count
self.user_attributes[v]['followers'] += 1
def remove_edge(self, u, v):
"""
移除关注关系
Remove following relationship
Args:
u (int): 关注者 Follower
v (int): 被关注者 Followee
"""
if self.storage_type == "adj_list":
# 从邻接表中移除边 Remove edge from adjacency list
if v in self.graph[u]:
self.graph[u].remove(v)
if self.directed:
# 更新粉丝数 Update follower count
self.user_attributes[v]['followers'] -= 1
if u in self.in_edges[v]:
self.in_edges[v].remove(u)
else: # adj_matrix
# 从邻接矩阵中移除边 Remove edge from adjacency matrix
self.graph[u][v] = 0
if self.directed:
# 更新粉丝数 Update follower count
self.user_attributes[v]['followers'] -= 1
def has_edge(self, u, v):
"""
检查是否存在边
Check if edge exists
Args:
u (int): 起始节点 Start node
v (int): 目标节点 Target node
Returns:
bool: 是否存在边 Whether edge exists
"""
if self.storage_type == "adj_list":
return v in self.graph[u]
else: # adj_matrix
return self.graph[u][v] == 1
def get_followers(self, user):
"""
获取指定用户的粉丝列表
Get followers list for a specific user
Args:
user (int): 用户ID User ID
Returns:
list: 粉丝列表 Followers list
"""
if self.storage_type == "adj_list":
return self.in_edges[user][:]
else: # adj_matrix
# 在邻接矩阵中查找所有指向该用户的节点 Find all nodes pointing to this user in adjacency matrix
followers = []
for i in range(self.n):
if self.graph[i][user] == 1:
followers.append(i)
return followers
def get_following(self, user):
"""
获取指定用户的关注列表
Get following list for a specific user
Args:
user (int): 用户ID User ID
Returns:
list: 关注列表 Following list
"""
if self.storage_type == "adj_list":
return self.graph[user][:]
else: # adj_matrix
# 在邻接矩阵中查找该用户指向的所有节点 Find all nodes this user points to in adjacency matrix
following = []
for i in range(self.n):
if self.graph[user][i] == 1:
following.append(i)
return following
def get_degree(self, user):
"""
获取用户的总度数(关注数+粉丝数)
Get total degree of user (following + followers)
Args:
user (int): 用户ID User ID
Returns:
int: 总度数 Total degree
"""
following = len(self.get_following(user))
followers = self.user_attributes[user]['followers']
return following + followers
def get_node_density(self):
"""
计算图的密度
Calculate graph density
Returns:
float: 图的密度 Graph density
"""
total_possible_edges = self.n * (self.n - 1) if self.directed else self.n * (self.n - 1) // 2
if total_possible_edges == 0:
return 0.0
if self.storage_type == "adj_list":
actual_edges = sum(len(self.graph[node]) for node in range(self.n))
else: # adj_matrix
actual_edges = sum(sum(row) for row in self.graph)
return actual_edges / total_possible_edges
def k_step_influence(self, start, k):
"""
计算k步内影响力传播范围
Calculate influence spread within k steps
Args:
start (int): 起始用户ID Start user ID
k (int): 传播步数 Number of steps
Returns:
int: k步内能影响的用户数 Number of users influenced within k steps
list: 受影响的用户列表 List of influenced users
"""
# 使用广度优先搜索实现 k 步影响力传播
# Implement k-step influence propagation using breadth-first search
visited = set() # 记录访问过的节点 Track visited nodes
queue = deque([(start, 0)]) # 队列保存 (节点, 步数) Queue stores (node, step)
visited.add(start)
influenced_users = [start] # 受影响的用户列表 List of influenced users
while queue:
current_node, current_step = queue.popleft()
# 如果当前步数达到k,则停止传播 If current step reaches k, stop propagation
if current_step >= k:
continue
# 获取当前节点的邻居 Get neighbors of current node
neighbors = self.get_following(current_node)
for neighbor in neighbors:
if neighbor not in visited:
visited.add(neighbor)
queue.append((neighbor, current_step + 1))
influenced_users.append(neighbor)
return len(influenced_users), influenced_users
def bfs_shortest_path(self, start, end):
"""
使用BFS计算两点间的最短路径
Calculate shortest path between two points using BFS
Args:
start (int): 起始节点 Start node
end (int): 结束节点 End node
Returns:
list: 最短路径节点列表 Shortest path node list
"""
if start == end:
return [start]
visited = set()
queue = deque([(start, [start])])
visited.add(start)
while queue:
current_node, path = queue.popleft()
for neighbor in self.get_following(current_node):
if neighbor == end:
return path + [neighbor]
if neighbor not in visited:
visited.add(neighbor)
queue.append((neighbor, path + [neighbor]))
return [] # 无路径 No path
def select_seed_users_greedy(self, k_steps, target_coverage, max_seeds=None):
"""
使用贪心算法选择种子用户以最大化影响力覆盖
Use greedy algorithm to select seed users to maximize influence coverage
Args:
k_steps (int): 传播步数 Number of propagation steps
target_coverage (int): 目标覆盖用户数 Target number of users to cover
max_seeds (int): 最大种子用户数 Maximum number of seed users (optional)
Returns:
list: 选择的种子用户列表 Selected seed users list
"""
if max_seeds is None:
max_seeds = self.n # 默认最多选择所有节点 Default to selecting all nodes if needed
selected_seeds = []
covered_nodes = set()
# 贪心算法:每一步选择能覆盖最多未覆盖节点的用户
# Greedy algorithm: At each step, select the user that covers the most uncovered nodes
for _ in range(max_seeds):
if len(covered_nodes) >= target_coverage:
break
best_user = None
best_coverage = -1
# 遍历所有可能的种子用户 Iterate through all possible seed users
for user in range(self.n):
# 如果用户已经在种子列表中,跳过 Skip if user is already in seed list
if user in selected_seeds:
continue
# 计算该用户k步内的影响力 Calculate influence of this user within k steps
_, influenced_users = self.k_step_influence(user, k_steps)
# 计算该用户能新增覆盖的节点数 Calculate additional nodes this user can cover
newly_covered = set(influenced_users) - covered_nodes
new_coverage = len(newly_covered)
# 更新最佳用户 Update best user if this one covers more
if new_coverage > best_coverage:
best_coverage = new_coverage
best_user = user
# 如果没有找到能增加覆盖的用户,结束 If no user increases coverage, end
if best_user is None or best_coverage == 0:
break
# 将最佳用户加入种子列表 Add best user to seed list
selected_seeds.append(best_user)
# 更新已覆盖节点 Update covered nodes
_, influenced_users = self.k_step_influence(best_user, k_steps)
covered_nodes.update(influenced_users)
return selected_seeds
def community_detection(self, algorithm='lpa', iterations=100):
"""
社区发现算法
Community Detection Algorithm
Args:
algorithm (str): 算法类型 ('lpa' for Label Propagation Algorithm)
iterations (int): 最大迭代次数 Maximum number of iterations
Returns:
dict: 社区划分结果,键为社区标签,值为属于该社区的节点列表
Community partitioning result, key is community label, value is list of nodes in that community
"""
if algorithm == 'lpa':
return self._label_propagation_algorithm(iterations)
else:
raise ValueError(f"不支持的算法类型: {algorithm}")
def _label_propagation_algorithm(self, iterations):
"""
基于标签传播的社区发现算法
Label Propagation Algorithm for Community Detection
Args:
iterations (int): 最大迭代次数 Maximum number of iterations
Returns:
dict: 社区划分结果 Community partitioning result
"""
# 初始化每个节点的标签 Initialize labels for each node
labels = {i: i for i in range(self.n)}
# 迭代更新标签 Iterate to update labels
for iteration in range(iterations):
# 随机打乱节点顺序 Randomly shuffle node order
nodes = list(range(self.n))
random.shuffle(nodes)
unchanged = True
for node in nodes:
# 获取邻居节点 Get neighbor nodes
neighbors = self.get_following(node)
# 统计邻居节点的标签频率 Count frequency of neighbor labels
label_counts = defaultdict(int)
for neighbor in neighbors:
label_counts[labels[neighbor]] += 1
# 如果没有邻居,跳过 If no neighbors, skip
if not label_counts:
continue
# 选择出现频率最高的标签 Select the most frequent label
max_count = max(label_counts.values())
candidates = [label for label, count in label_counts.items() if count == max_count]
# 随机选择一个标签(如果有多个最高频率的标签)Randomly select a label if multiple have max frequency
new_label = min(candidates) # 使用min确保结果一致性 Use min for consistent results
# 更新标签 Update label if different
if labels[node] != new_label:
labels[node] = new_label
unchanged = False
# 如果没有标签发生变化,提前结束 If no labels changed, end early
if unchanged:
break
# 将相同标签的节点分组 Group nodes with the same label
communities = defaultdict(list)
for node, label in labels.items():
communities[label].append(node)
return dict(communities)
def floyd_warshall_all_pairs_shortest_path(self):
"""
使用Floyd-Warshall算法计算所有节点对之间的最短路径
Calculate all-pairs shortest paths using Floyd-Warshall algorithm
Returns:
list: 二维数组,dist[i][j]表示从节点i到节点j的最短距离
2D array where dist[i][j] represents the shortest distance from node i to node j
"""
# 初始化距离矩阵 Initialize distance matrix
INF = float('inf')
dist = [[INF for _ in range(self.n)] for _ in range(self.n)]
# 设置初始距离 Set initial distances
for i in range(self.n):
dist[i][i] = 0 # 自己到自己的距离为0 Distance from node to itself is 0
# 添加直接相连的边 Add directly connected edges
for i in range(self.n):
neighbors = self.get_following(i)
for j in neighbors:
dist[i][j] = 1 # 直接相连的距离为1 Distance for directly connected nodes is 1
# Floyd-Warshall算法 Floyd-Warshall algorithm
for k in range(self.n):
for i in range(self.n):
for j in range(self.n):
if dist[i][k] != INF and dist[k][j] != INF:
dist[i][j] = min(dist[i][j], dist[i][k] + dist[k][j])
return dist
def compute_betweenness_centrality(self):
"""
计算每个节点的介数中心性(Betweenness Centrality)
Compute betweenness centrality for each node
Returns:
dict: 每个节点的介数中心性值 Dictionary of betweenness centrality values for each node
"""
betweenness = {i: 0.0 for i in range(self.n)}
# 对每个节点计算它在多少最短路径上 For each node, calculate how many shortest paths it lies on
for s in range(self.n):
# 使用BFS计算从s出发的最短路径 Use BFS to calculate shortest paths from s
stack = []
predecessors = {i: [] for i in range(self.n)}
shortest_paths_num = {i: 0 for i in range(self.n)}
distance = {i: -1 for i in range(self.n)}
queue = deque([s])
distance[s] = 0
shortest_paths_num[s] = 1
while queue:
v = queue.popleft()
stack.append(v)
# 遍历v的邻居节点 Iterate through neighbors of v
for w in self.get_following(v):
if distance[w] == -1: # w未被访问过 w hasn't been visited
queue.append(w)
distance[w] = distance[v] + 1
if distance[w] == distance[v] + 1: # w在最短路径上 w is on shortest path
shortest_paths_num[w] += shortest_paths_num[v]
predecessors[w].append(v)
# 计算依赖度 Calculate dependencies
delta = {i: 0.0 for i in range(self.n)}
# 从栈顶开始计算 Calculate from top of stack
while stack:
w = stack.pop()
for v in predecessors[w]:
delta[v] += (shortest_paths_num[v] / shortest_paths_num[w]) * (1 + delta[w])
if w != s:
betweenness[w] += delta[w]
# 标准化介数中心性 Normalize betweenness centrality
for node in betweenness:
betweenness[node] /= 2 # 因为图是有向的,但我们在计算无向图的中心性 Because the graph is directed, but we're calculating undirected graph centrality
return betweenness
def topological_sort(self):
"""
拓扑排序,用于确定任务执行顺序
Topological sort for determining task execution order
Returns:
list: 拓扑排序结果,如果存在循环依赖则返回空列表
Topologically sorted list, or empty list if circular dependency exists
"""
# 计算每个节点的入度 Calculate in-degree for each node
in_degree = [0] * self.n
for i in range(self.n):
neighbors = self.get_following(i)
for neighbor in neighbors:
in_degree[neighbor] += 1
# 找到所有入度为0的节点 Find all nodes with in-degree 0
queue = deque()
for i in range(self.n):
if in_degree[i] == 0:
queue.append(i)
topo_order = []
# 执行Kahn算法 Execute Kahn's algorithm
while queue:
node = queue.popleft()
topo_order.append(node)
# 减少邻居节点的入度 Reduce in-degree of neighbors
for neighbor in self.get_following(node):
in_degree[neighbor] -= 1
if in_degree[neighbor] == 0:
queue.append(neighbor)
# 如果拓扑排序结果不包含所有节点,说明存在循环依赖
# If topological sort doesn't include all nodes, there's a circular dependency
if len(topo_order) != self.n:
return [] # 循环依赖存在 Circular dependency exists
return topo_order
def has_cycle(self):
"""
检测图中是否存在循环依赖
Detect if there's a circular dependency in the graph
Returns:
bool: 是否存在循环依赖 Whether circular dependency exists
"""
# 使用DFS检测循环 Use DFS to detect cycles
WHITE, GRAY, BLACK = 0, 1, 2
color = [WHITE] * self.n
def dfs_visit(node):
if color[node] == GRAY:
return True # 发现后向边,存在循环 Found back edge, cycle exists
if color[node] == BLACK:
return False # 已经访问过,无循环 Already visited, no cycle
color[node] = GRAY # 标记为正在访问 Mark as being visited
# 检查所有邻居节点 Check all neighbor nodes
for neighbor in self.get_following(node):
if dfs_visit(neighbor):
return True
color[node] = BLACK # 标记为已完成访问 Mark as completely visited
return False
for i in range(self.n):
if color[i] == WHITE:
if dfs_visit(i):
return True
return False
class HashTable:
"""
哈希表实现,用于快速查询
Hash Table implementation for fast queries
哈希函数采用除法散列,冲突解决使用链地址法
Hash function uses division method, collision resolution uses chaining
"""
def __init__(self, size=1000):
"""
初始化哈希表
Initialize hash table
Args:
size (int): 哈希表大小 Hash table size
"""
self.size = size
self.table = [[] for _ in range(size)] # 使用链地址法处理冲突 Use chaining to handle collisions
self.count = 0 # 元素数量 Number of elements
def _hash(self, key):
"""
哈希函数
Hash function
Args:
key: 键 Key
Returns:
int: 哈希值 Hash value
"""
if isinstance(key, str):
# 字符串哈希:使用多项式滚动哈希 Use polynomial rolling hash for strings
hash_value = 0
for char in key:
hash_value = (hash_value * 31 + ord(char)) % self.size
return hash_value
else:
# 数字哈希:使用除法散列 Use division hashing for numbers
return hash(key) % self.size
def insert(self, key, value):
"""
插入键值对
Insert key-value pair
Args:
key: 键 Key
value: 值 Value
"""
index = self._hash(key)
bucket = self.table[index]
# 检查键是否已存在 Check if key already exists
for i, (k, v) in enumerate(bucket):
if k == key:
bucket[i] = (key, value) # 更新现有键 Update existing key
return
# 添加新的键值对 Add new key-value pair
bucket.append((key, value))
self.count += 1
def search(self, key):
"""
搜索键对应的值
Search for value corresponding to key
Args:
key: 键 Key
Returns:
值或None Value or None if not found
"""
index = self._hash(key)
bucket = self.table[index]
for k, v in bucket:
if k == key:
return v
return None # 未找到 Not found
def delete(self, key):
"""
删除键值对
Delete key-value pair
Args:
key: 键 Key
Returns:
bool: 是否删除成功 Whether deletion was successful
"""
index = self._hash(key)
bucket = self.table[index]
for i, (k, v) in enumerate(bucket):
if k == key:
del bucket[i]
self.count -= 1
return True
return False # 键不存在 Key doesn't exist
class MaxHeap:
"""
最大堆实现,用于维护Top-K排名
Max Heap implementation for maintaining Top-K rankings
"""
def __init__(self):
self.heap = []
def parent(self, i):
return (i - 1) // 2
def left_child(self, i):
return 2 * i + 1
def right_child(self, i):
return 2 * i + 2
def swap(self, i, j):
self.heap[i], self.heap[j] = self.heap[j], self.heap[i]
def insert(self, value):
"""
插入元素
Insert element
"""
self.heap.append(value)
self._heapify_up(len(self.heap) - 1)
def _heapify_up(self, i):
"""
向上调整堆
Heapify up
"""
while i != 0 and self.heap[self.parent(i)][0] < self.heap[i][0]:
self.swap(i, self.parent(i))
i = self.parent(i)
def extract_max(self):
"""
提取最大元素
Extract maximum element
"""
if not self.heap:
return None
if len(self.heap) == 1:
return self.heap.pop()
root = self.heap[0]
self.heap[0] = self.heap.pop()
self._heapify_down(0)
return root
def _heapify_down(self, i):
"""
向下调整堆
Heapify down
"""
largest = i
left = self.left_child(i)
right = self.right_child(i)
if left < len(self.heap) and self.heap[left][0] > self.heap[largest][0]:
largest = left
if right < len(self.heap) and self.heap[right][0] > self.heap[largest][0]:
largest = right
if largest != i:
self.swap(i, largest)
self._heapify_down(largest)
def peek(self):
"""
查看堆顶元素
Peek at root element
"""
return self.heap[0] if self.heap else None
def size(self):
"""
返回堆大小
Return heap size
"""
return len(self.heap)
class MinHeap:
"""
最小堆实现,用于维护Top-K排名
Min Heap implementation for maintaining Top-K rankings
"""
def __init__(self):
self.heap = []
def parent(self, i):
return (i - 1) // 2
def left_child(self, i):
return 2 * i + 1
def right_child(self, i):
return 2 * i + 2
def swap(self, i, j):
self.heap[i], self.heap[j] = self.heap[j], self.heap[i]
def insert(self, value):
"""
插入元素
Insert element
"""
self.heap.append(value)
self._heapify_up(len(self.heap) - 1)
def _heapify_up(self, i):
"""
向上调整堆
Heapify up
"""
while i != 0 and self.heap[self.parent(i)][0] > self.heap[i][0]:
self.swap(i, self.parent(i))
i = self.parent(i)
def extract_min(self):
"""
提取最小元素
Extract minimum element
"""
if not self.heap:
return None
if len(self.heap) == 1:
return self.heap.pop()
root = self.heap[0]
self.heap[0] = self.heap.pop()
self._heapify_down(0)
return root
def _heapify_down(self, i):
"""
向下调整堆
Heapify down
"""
smallest = i
left = self.left_child(i)
right = self.right_child(i)
if left < len(self.heap) and self.heap[left][0] < self.heap[smallest][0]:
smallest = left
if right < len(self.heap) and self.heap[right][0] < self.heap[smallest][0]:
smallest = right
if smallest != i:
self.swap(i, smallest)
self._heapify_down(smallest)
def peek(self):
"""
查看堆顶元素
Peek at root element
"""
return self.heap[0] if self.heap else None
def size(self):
"""
返回堆大小
Return heap size
"""
return len(self.heap)
class OptimizedSocialNetwork(SocialNetwork):
"""
优化的社交网络类,使用哈希表加速查询
Optimized Social Network class that uses hash tables to accelerate queries
"""
def __init__(self, n, directed=True, storage_type="adj_list"):
super().__init__(n, directed, storage_type)
# 创建哈希表用于快速查询 Create hash tables for fast queries
self.user_info_table = HashTable(size=max(1000, n // 10)) # 用户信息表 User info table
self.edge_query_table = HashTable(size=max(2000, n // 5)) # 边查询表 Edge query table
self.label_query_table = HashTable(size=max(1000, n // 10)) # 标签查询表 Label query table
# 初始化用户信息表 Initialize user info table
for i in range(n):
self.user_info_table.insert(i, self.user_attributes[i])
def add_edge(self, u, v):
"""
添加关注关系(重写父类方法以更新哈希表)
Add following relationship (override parent method to update hash tables)
"""
super().add_edge(u, v)
# 更新用户信息表 Update user info table
self.user_info_table.insert(u, self.user_attributes[u])
self.user_info_table.insert(v, self.user_attributes[v])
# 更新边查询表 Update edge query table
edge_key = (u, v)
self.edge_query_table.insert(edge_key, True)
def remove_edge(self, u, v):
"""
移除关注关系(重写父类方法以更新哈希表)
Remove following relationship (override parent method to update hash tables)
"""
super().remove_edge(u, v)
# 更新用户信息表 Update user info table
self.user_info_table.insert(u, self.user_attributes[u])
self.user_info_table.insert(v, self.user_attributes[v])
# 更新边查询表 Update edge query table
edge_key = (u, v)
self.edge_query_table.delete(edge_key)
def quick_user_lookup(self, user_id):
"""
快速用户信息查询
Quick user information lookup
Args:
user_id (int): 用户ID User ID
Returns:
dict: 用户信息 User information
"""
return self.user_info_table.search(user_id)
def quick_edge_check(self, u, v):
"""
快速检查两个用户之间是否存在关注关系
Quickly check if there's a following relationship between two users
Args:
u (int): 起始用户ID Start user ID
v (int): 目标用户ID Target user ID
Returns:
bool: 是否存在关注关系 Whether following relationship exists
"""
edge_key = (u, v)
result = self.edge_query_table.search(edge_key)
return result is not None
def quick_label_search(self, label):
"""
快速检索具有相同标签的用户
Quickly retrieve users with the same label
Args:
label: 标签 Label
Returns:
list: 具有相同标签的用户列表 List of users with the same label
"""
# 遍历所有用户来查找具有特定标签的用户
# Iterate through all users to find those with specific label
users_with_label = []
for user_id in range(self.n):
user_info = self.quick_user_lookup(user_id)
if user_info and user_info.get('label') == label:
users_with_label.append(user_id)
return users_with_label
def get_top_k_by_followers(self, k):
"""
获取粉丝数前K名的用户
Get top K users by number of followers
Args:
k (int): 前K名 Top K
Returns:
list: 前K名用户列表 List of top K users
"""
# 使用最小堆维护Top-K列表
# Use min heap to maintain Top-K list
min_heap = MinHeap()
for user_id in range(self.n):
followers_count = self.user_attributes[user_id]['followers']
if min_heap.size() < k:
# 堆未满,直接插入 Heap not full, insert directly
min_heap.insert((followers_count, user_id))
elif followers_count > min_heap.peek()[0]:
# 当前用户粉丝数比堆顶多,替换堆顶 Current user has more followers than heap top, replace heap top
min_heap.extract_min()
min_heap.insert((followers_count, user_id))
# 提取结果 Extract results
result = []
temp_list = []
# 将堆中元素取出并按粉丝数降序排列 Extract elements from heap and sort by followers in descending order
while min_heap.size() > 0: