-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathasync_operations.py
More file actions
278 lines (218 loc) · 9.07 KB
/
async_operations.py
File metadata and controls
278 lines (218 loc) · 9.07 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
#!/usr/bin/env python3
"""
Scintirete SDK 异步操作示例
演示如何使用 Scintirete Python SDK 进行异步操作和并发处理。
"""
import asyncio
import random
import time
from typing import List
from scintirete_sdk import (
ScintireteAsyncClient,
DistanceMetric,
HnswConfig,
Vector,
ScintireteError,
)
def generate_random_vector(dimension: int) -> List[float]:
"""生成随机向量"""
return [random.random() for _ in range(dimension)]
async def create_test_data(client: ScintireteAsyncClient, db_name: str, collection_name: str, count: int = 100) -> List[int]:
"""创建测试数据"""
print(f"🔧 创建 {count} 个测试向量...")
# 生成测试向量
vectors = []
for i in range(count):
vector = Vector(
elements=generate_random_vector(128),
metadata={
"batch": i // 10,
"index": i,
"category": f"type_{i % 5}",
"created_at": time.time()
}
)
vectors.append(vector)
# 批量插入
inserted_ids, inserted_count = await client.insert_vectors(
db_name=db_name,
collection_name=collection_name,
vectors=vectors
)
print(f"✅ 插入了 {inserted_count} 个向量")
return inserted_ids
async def concurrent_search_demo(client: ScintireteAsyncClient, db_name: str, collection_name: str):
"""并发搜索演示"""
print("\n🚀 并发搜索演示")
print("=" * 50)
# 生成多个查询向量
query_vectors = [generate_random_vector(128) for _ in range(10)]
async def search_task(query_id: int, query_vector: List[float]):
"""单个搜索任务"""
start_time = time.time()
results = await client.search(
db_name=db_name,
collection_name=collection_name,
query_vector=query_vector,
top_k=5
)
duration = time.time() - start_time
print(f" 查询 {query_id}: 找到 {len(results)} 个结果,耗时 {duration:.3f}s")
return query_id, results, duration
# 并发执行搜索
print("执行 10 个并发搜索查询...")
start_time = time.time()
tasks = [search_task(i, query_vectors[i]) for i in range(len(query_vectors))]
results = await asyncio.gather(*tasks)
total_duration = time.time() - start_time
print(f"✅ 所有查询完成,总耗时: {total_duration:.3f}s")
print(f"平均每个查询耗时: {total_duration / len(query_vectors):.3f}s")
return results
async def batch_operations_demo(client: ScintireteAsyncClient, db_name: str):
"""批处理操作演示"""
print("\n📦 批处理操作演示")
print("=" * 50)
collection_names = [f"batch_collection_{i}" for i in range(5)]
async def create_collection_task(collection_name: str):
"""创建集合任务"""
try:
info = await client.create_collection(
db_name=db_name,
collection_name=collection_name,
metric_type=DistanceMetric.COSINE,
hnsw_config=HnswConfig(m=16, ef_construction=200)
)
print(f" ✅ 创建集合: {collection_name}")
return collection_name, True
except Exception as e:
print(f" ❌ 创建集合失败 {collection_name}: {e}")
return collection_name, False
# 并发创建多个集合
print("并发创建 5 个集合...")
tasks = [create_collection_task(name) for name in collection_names]
results = await asyncio.gather(*tasks, return_exceptions=True)
successful_collections = [name for name, success in results if success]
print(f"成功创建了 {len(successful_collections)} 个集合")
# 并发删除集合
async def drop_collection_task(collection_name: str):
"""删除集合任务"""
try:
success, dropped_vectors = await client.drop_collection(
db_name=db_name,
collection_name=collection_name
)
print(f" 🗑️ 删除集合: {collection_name} (删除了 {dropped_vectors} 个向量)")
return collection_name, success
except Exception as e:
print(f" ❌ 删除集合失败 {collection_name}: {e}")
return collection_name, False
print("\n并发删除集合...")
tasks = [drop_collection_task(name) for name in successful_collections]
await asyncio.gather(*tasks, return_exceptions=True)
async def performance_comparison_demo(client: ScintireteAsyncClient, db_name: str, collection_name: str):
"""性能对比演示:顺序 vs 并发"""
print("\n⚡ 性能对比演示")
print("=" * 50)
query_vectors = [generate_random_vector(128) for _ in range(20)]
# 顺序搜索
print("执行顺序搜索...")
start_time = time.time()
for i, query_vector in enumerate(query_vectors):
await client.search(
db_name=db_name,
collection_name=collection_name,
query_vector=query_vector,
top_k=5
)
sequential_duration = time.time() - start_time
print(f"顺序搜索耗时: {sequential_duration:.3f}s")
# 并发搜索
print("执行并发搜索...")
start_time = time.time()
async def search_single(query_vector: List[float]):
return await client.search(
db_name=db_name,
collection_name=collection_name,
query_vector=query_vector,
top_k=5
)
tasks = [search_single(query_vector) for query_vector in query_vectors]
await asyncio.gather(*tasks)
concurrent_duration = time.time() - start_time
print(f"并发搜索耗时: {concurrent_duration:.3f}s")
# 性能提升
speedup = sequential_duration / concurrent_duration
print(f"性能提升: {speedup:.2f}x")
async def error_handling_demo(client: ScintireteAsyncClient):
"""错误处理演示"""
print("\n🚨 错误处理演示")
print("=" * 50)
# 尝试访问不存在的数据库
try:
print("尝试访问不存在的数据库...")
await client.list_collections("non_existent_db")
except ScintireteError as e:
print(f" 捕获到预期错误: {e}")
# 尝试创建已存在的数据库
try:
print("尝试重复创建数据库...")
await client.create_database("example_async_db") # 假设已存在
await client.create_database("example_async_db") # 重复创建
except ScintireteError as e:
print(f" 捕获到预期错误: {e}")
except Exception as e:
print(f" 数据库可能已存在或创建成功: {e}")
async def main():
"""主异步函数"""
# 连接配置
SERVER_ADDRESS = "localhost:50051"
PASSWORD = None # 如果服务器需要密码,请设置
# 测试数据配置
DB_NAME = "example_async_db"
COLLECTION_NAME = "example_async_collection"
try:
print("🔗 连接到 Scintirete 服务器(异步模式)...")
async with ScintireteAsyncClient(SERVER_ADDRESS, password=PASSWORD) as client:
print("✅ 异步连接成功!")
# 准备测试环境
print("\n🏗️ 准备测试环境")
print("=" * 50)
# 清理并创建数据库
try:
await client.drop_database(DB_NAME)
print(f"删除已存在的数据库: {DB_NAME}")
except:
pass
success = await client.create_database(DB_NAME)
print(f"创建数据库: {DB_NAME} - {success}")
# 创建集合
collection_info = await client.create_collection(
db_name=DB_NAME,
collection_name=COLLECTION_NAME,
metric_type=DistanceMetric.COSINE,
hnsw_config=HnswConfig(m=16, ef_construction=200)
)
print(f"创建集合: {collection_info.name}")
# 创建测试数据
inserted_ids = await create_test_data(client, DB_NAME, COLLECTION_NAME, 100)
# 等待索引构建完成
print("⏳ 等待索引构建完成...")
await asyncio.sleep(2)
# 演示各种异步操作
await concurrent_search_demo(client, DB_NAME, COLLECTION_NAME)
await batch_operations_demo(client, DB_NAME)
await performance_comparison_demo(client, DB_NAME, COLLECTION_NAME)
await error_handling_demo(client)
# 清理
print("\n🧹 清理测试数据...")
await client.drop_database(DB_NAME)
print("测试环境清理完成")
print("\n🎉 异步操作演示完成!")
except ScintireteError as e:
print(f"❌ Scintirete 错误: {e}")
except Exception as e:
print(f"❌ 未预期错误: {e}")
raise
if __name__ == "__main__":
# 运行异步主函数
asyncio.run(main())