Skip to content

Commit 82d0a8a

Browse files
improve index management in r rollback.c
1 parent 6637278 commit 82d0a8a

File tree

6 files changed

+394
-288
lines changed

6 files changed

+394
-288
lines changed

PersonalTesting/setup_test.py

Lines changed: 18 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ def get_voyage_embedding(text, cache_key=None):
5151
"input_type": "document"
5252
}
5353

54+
print(f"Calling Voyage API for text: {text[:50]}...")
5455
response = requests.post(
5556
"https://api.voyageai.com/v1/embeddings",
5657
headers=headers,
@@ -81,6 +82,8 @@ def get_openai_embedding(text, cache_key=None):
8182
"model": "text-embedding-3-small",
8283
"encoding_format": "float"
8384
}
85+
86+
print(f"Calling OpenAI API for text: {text[:50]}...")
8487
response = requests.post(
8588
"https://api.openai.com/v1/embeddings",
8689
headers=headers,
@@ -94,27 +97,6 @@ def get_openai_embedding(text, cache_key=None):
9497

9598
return embedding
9699

97-
def create_synthetic_embedding(base_embedding, variation=0.05, dims=None):
98-
"""Create a synthetic embedding based on an existing one with some variation"""
99-
if dims is None:
100-
dims = base_embedding.shape[0]
101-
102-
if base_embedding is not None:
103-
# Add random noise to the base embedding
104-
noise = np.random.normal(0, variation, base_embedding.shape)
105-
synthetic = base_embedding + noise
106-
107-
# Normalize the embedding
108-
norm = np.linalg.norm(synthetic)
109-
if norm > 0:
110-
synthetic = synthetic / norm
111-
else:
112-
# Generate a completely random embedding if no base is provided
113-
synthetic = np.random.random(dims).astype(np.float32)
114-
synthetic = synthetic / np.linalg.norm(synthetic)
115-
116-
return synthetic
117-
118100
def file_exists_and_not_empty(filepath):
119101
"""Check if a file exists and is not empty"""
120102
return os.path.exists(filepath) and os.path.getsize(filepath) > 0
@@ -199,9 +181,6 @@ def create_test_files():
199181
"api_design.md": api_doc
200182
}
201183

202-
# Keep track of real embeddings for synthetic generation
203-
real_embeddings = {}
204-
205184
for filename, content in files.items():
206185
print(f"\nProcessing {filename}...")
207186

@@ -212,40 +191,38 @@ def create_test_files():
212191
openai_emb = get_openai_embedding(content, cache_key=f"openai_{filename}")
213192
np.save(openai_path, openai_emb)
214193
print(f"Created {openai_path}")
215-
real_embeddings['openai'] = openai_emb
216194

217195
# Also save as binary
218196
openai_bin_path = f"test_files/{filename}.openai.bin"
219197
save_as_bin(openai_emb, openai_bin_path)
220198
else:
221199
print(f"Using existing OpenAI embedding: {openai_path}")
222-
real_embeddings['openai'] = np.load(openai_path)
223200

224201
# Create bin file if it doesn't exist
225202
openai_bin_path = f"test_files/{filename}.openai.bin"
226203
if not file_exists_and_not_empty(openai_bin_path):
227-
save_as_bin(real_embeddings['openai'], openai_bin_path)
204+
openai_emb = np.load(openai_path)
205+
save_as_bin(openai_emb, openai_bin_path)
228206

229-
# Generate Voyage embedding as a synthetic version of OpenAI to reduce API calls
207+
# Generate real Voyage embedding (only if not exists)
230208
voyage_path = f"test_files/{filename}.voyage.npy"
231209
if not file_exists_and_not_empty(voyage_path):
232-
print("Creating synthetic Voyage embedding based on OpenAI embedding...")
233-
voyage_emb = create_synthetic_embedding(real_embeddings['openai'])
210+
print("Getting Voyage embedding for full document...")
211+
voyage_emb = get_voyage_embedding(content, cache_key=f"voyage_{filename}")
234212
np.save(voyage_path, voyage_emb)
235213
print(f"Created {voyage_path}")
236-
real_embeddings['voyage'] = voyage_emb
237214

238215
# Also save as binary
239216
voyage_bin_path = f"test_files/{filename}.voyage.bin"
240217
save_as_bin(voyage_emb, voyage_bin_path)
241218
else:
242219
print(f"Using existing Voyage embedding: {voyage_path}")
243-
real_embeddings['voyage'] = np.load(voyage_path)
244220

245221
# Create bin file if it doesn't exist
246222
voyage_bin_path = f"test_files/{filename}.voyage.bin"
247223
if not file_exists_and_not_empty(voyage_bin_path):
248-
save_as_bin(real_embeddings['voyage'], voyage_bin_path)
224+
voyage_emb = np.load(voyage_path)
225+
save_as_bin(voyage_emb, voyage_bin_path)
249226

250227
def create_modified_files():
251228
"""Create modified versions of test files to test rollback"""
@@ -277,11 +254,11 @@ def create_modified_files():
277254
modified_openai_bin_path = "test_files/technical_doc.txt.modified.openai.bin"
278255
save_as_bin(openai_emb, modified_openai_bin_path)
279256

280-
# Create synthetic Voyage embedding
257+
# Create real Voyage embedding
281258
modified_voyage_path = "test_files/technical_doc.txt.modified.voyage.npy"
282-
voyage_emb = create_synthetic_embedding(openai_emb)
259+
voyage_emb = get_voyage_embedding(modified_tech_doc, cache_key="voyage_tech_modified")
283260
np.save(modified_voyage_path, voyage_emb)
284-
print(f"Created synthetic {modified_voyage_path}")
261+
print(f"Created {modified_voyage_path}")
285262

286263
# Also save as binary
287264
modified_voyage_bin_path = "test_files/technical_doc.txt.modified.voyage.bin"
@@ -337,11 +314,11 @@ def create_modified_files():
337314
modified_openai_bin_path = "test_files/api_design.md.modified.openai.bin"
338315
save_as_bin(openai_emb, modified_openai_bin_path)
339316

340-
# Create synthetic Voyage embedding
317+
# Create real Voyage embedding
341318
modified_voyage_path = "test_files/api_design.md.modified.voyage.npy"
342-
voyage_emb = create_synthetic_embedding(openai_emb)
319+
voyage_emb = get_voyage_embedding(modified_api_doc, cache_key="voyage_api_modified")
343320
np.save(modified_voyage_path, voyage_emb)
344-
print(f"Created synthetic {modified_voyage_path}")
321+
print(f"Created {modified_voyage_path}")
345322

346323
# Also save as binary
347324
modified_voyage_bin_path = "test_files/api_design.md.modified.voyage.bin"
@@ -371,6 +348,7 @@ def print_test_commands():
371348
print("\n1. Initialize EmbeddingBridge in your project:")
372349
print(" eb init")
373350
print(" eb model register openai-3-small --dimensions 1536 --description \"OpenAI text-embedding-3-small model\"")
351+
print(" eb model register voyage-2 --dimensions 1024 --description \"Voyage-2 embedding model\"")
374352

375353
print("\n2. Create and set up the test files (this runs your setup script):")
376354
print(" python PersonalTesting/setup_test.py")
@@ -415,7 +393,7 @@ def print_test_commands():
415393
print(" # Compare using specific model")
416394
print(" eb diff --model openai-3-small <short hash> <short hash>")
417395
print(" # Compare using different models for each file")
418-
print(" eb diff --models openai-3-small,voyage <short hash> <short hash>")
396+
print(" eb diff --models openai-3-small,voyage-2 <short hash> <short hash>")
419397

420398
print("\n8. Rollback to previous versions:")
421399
print(" # Rollback by file path (goes to previous version)")

PersonalTesting/test_files/api_design.md

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2,31 +2,31 @@
22

33

44
## Overview
5-
Dolore dolor neque quisquam etincidunt neque dolore. Ipsum porro dolor dolore quisquam quisquam sit etincidunt. Amet adipisci numquam sed non sed dolore. Porro porro ut porro labore eius non. Amet quaerat quisquam numquam ipsum est voluptatem. Neque ut numquam voluptatem sit tempora ipsum etincidunt.
5+
Adipisci ut magnam non modi dolor. Sit quiquia tempora magnam dolore sed ipsum. Dolorem dolore porro magnam adipisci. Ipsum voluptatem dolorem velit. Dolor etincidunt aliquam consectetur. Ut neque non adipisci porro quaerat modi.
66

77
## Authentication
8-
Magnam porro adipisci eius. Aliquam non sit est ipsum est amet voluptatem. Labore ipsum consectetur consectetur dolore neque est. Sit sit sed aliquam modi sed velit. Neque ipsum numquam voluptatem sed etincidunt numquam. Quaerat numquam eius dolor neque velit modi. Quaerat voluptatem ipsum tempora porro amet. Modi quaerat ut etincidunt.
8+
Adipisci ut amet adipisci porro. Sit dolore aliquam modi ipsum dolorem dolorem. Tempora etincidunt quaerat adipisci magnam dolorem. Consectetur quisquam consectetur velit quisquam non. Quiquia magnam ipsum sit dolore numquam adipisci. Non quaerat dolorem etincidunt etincidunt porro est quisquam. Consectetur aliquam etincidunt est quaerat aliquam ut dolor. Ut labore ut ipsum dolore.
99

10-
Sit aliquam est voluptatem modi voluptatem voluptatem magnam.
10+
Consectetur consectetur ipsum est.
1111

1212
## Endpoints
13-
- POST /embeddings: Quaerat magnam numquam neque est est.
14-
- GET /models: Tempora ut magnam modi tempora labore velit.
15-
- PUT /configurations: Dolorem modi neque dolorem neque velit non porro.
16-
- DELETE /models/{id}: Consectetur est quiquia amet.
13+
- POST /embeddings: Quaerat numquam non dolorem eius.
14+
- GET /models: Aliquam dolor porro eius.
15+
- PUT /configurations: Quiquia dolore est quiquia quaerat ipsum sit.
16+
- DELETE /models/{id}: Eius etincidunt labore quaerat est.
1717

1818
## Error Handling
19-
Porro etincidunt aliquam quaerat ipsum sit. Neque ipsum dolor eius numquam. Non non quiquia amet amet eius neque. Voluptatem dolore dolore sed neque ut non. Adipisci etincidunt sed est quisquam. Tempora dolore est aliquam non. Adipisci etincidunt non dolor tempora velit dolorem voluptatem. Etincidunt labore numquam etincidunt modi labore porro ipsum.
19+
Consectetur quiquia ut quisquam neque eius quaerat. Dolor non consectetur modi non. Voluptatem ut est ipsum. Dolore eius sit consectetur aliquam. Tempora neque non voluptatem. Dolor velit numquam neque quaerat.
2020

2121
## Rate Limiting
22-
Consectetur dolor numquam sed. Neque neque consectetur aliquam quisquam. Dolore labore numquam ut quisquam. Dolore adipisci adipisci quisquam porro labore velit tempora. Sed ut adipisci modi. Eius dolorem porro quisquam neque voluptatem voluptatem. Velit dolore quisquam velit.
22+
Amet ipsum amet ipsum dolore etincidunt dolore. Est sed amet sit adipisci consectetur. Consectetur modi amet quisquam ipsum voluptatem ipsum voluptatem. Labore quaerat voluptatem ut. Voluptatem quisquam porro dolore dolor magnam velit amet.
2323

2424
## New Endpoints
25-
- PATCH /models/{id}: Quaerat consectetur est adipisci neque dolore.
26-
- POST /validate: Ut sit aliquam dolorem amet.
25+
- PATCH /models/{id}: Etincidunt neque quiquia sed adipisci est numquam neque.
26+
- POST /validate: Neque eius neque dolore sed.
2727

2828
## Best Practices
29-
- Magnam dolorem sed dolor.
30-
- Velit consectetur modi sed.
31-
- Consectetur sit sit est.
32-
- Etincidunt quaerat velit quiquia labore.
29+
- Sit porro etincidunt labore etincidunt numquam dolore.
30+
- Numquam adipisci quiquia sit non etincidunt.
31+
- Quiquia quiquia sed aliquam quaerat velit eius dolore.
32+
- Dolorem eius sit labore est.

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@ A command-line tool for managing and versioning embedding vectors. Think of it a
1616

1717
```bash
1818
make clean
19-
make DEBUG=0 all
20-
make DEBUG=0 install
19+
make all
20+
make install
2121
```
2222

2323
## Quick Start

0 commit comments

Comments
 (0)