-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmergebins.cpp
More file actions
67 lines (47 loc) · 1.68 KB
/
mergebins.cpp
File metadata and controls
67 lines (47 loc) · 1.68 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
using namespace std;
const int NUM_SHARDS = 32;
const string FINAL_INDEX = "index.bin";
const string FINAL_OFFSET = "offset.txt";
const int TOTAL_DOCS = 7084107;
int main(){
ofstream final_bin(FINAL_INDEX, ios::binary);
ofstream final_offset(FINAL_OFFSET);
if(!final_bin.is_open() || !final_offset.is_open()){
cerr << "Final bin or Final Offset not opening";
return 1;
}
final_bin.write(reinterpret_cast<const char*>(&TOTAL_DOCS), sizeof(TOTAL_DOCS));
for(int i=0; i<NUM_SHARDS; i++){
string chunk_bin_name = "chunk_" + to_string(i) + ".bin";
string chunk_offset_name = "chunk_offsets_" + to_string(i) + ".txt";
cout << "----- Merging File " << i << " -----" <<endl;
long long global_base_offset = final_bin.tellp();
ifstream chunk_offset(chunk_offset_name);
string term;
long long localpos;
if(!chunk_offset.is_open()){
cerr << "Chunk offset " << i << " did not open";
return 1;
}
while(chunk_offset >> term >> localpos){
long long finalpos = global_base_offset + localpos;
final_offset << term << " " << finalpos << '\n';
}
chunk_offset.close();
ifstream chunkbin(chunk_bin_name, ios::binary);
if(!chunkbin.is_open()){
cerr << "Chunk Bin" << i << " did not open";
return 1;
}
final_bin << chunkbin.rdbuf();
chunkbin.close();
cout << "----- Chunk " << i << " finished combining -----" << endl;
}
final_bin.close();
final_offset.close();
return 0;
}