Skip to content

Commit 957631b

Browse files
gh-54: Improve MAP performance.
1 parent a3c4352 commit 957631b

2 files changed

Lines changed: 167 additions & 49 deletions

File tree

src/value.c

Lines changed: 164 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include <stdlib.h>
44
#include <string.h>
55
#include <assert.h>
6+
#include <stdint.h>
67

78
#ifdef _MSC_VER
89
#define strdup _strdup
@@ -277,51 +278,169 @@ Value value_tns_slice(Value v, const int64_t* starts, const int64_t* ends, size_
277278
return out;
278279
}
279280

280-
// Map implementation
281+
// Map implementation (ordered hash table)
282+
static uint64_t map_hash_mix(uint64_t x) {
283+
x ^= x >> 33;
284+
x *= 0xff51afd7ed558ccdULL;
285+
x ^= x >> 33;
286+
x *= 0xc4ceb9fe1a85ec53ULL;
287+
x ^= x >> 33;
288+
return x;
289+
}
290+
291+
static uint64_t map_hash_string(const char* s) {
292+
uint64_t h = 1469598103934665603ULL;
293+
const unsigned char* p = (const unsigned char*)(s ? s : "");
294+
while (*p) {
295+
h ^= (uint64_t)(*p++);
296+
h *= 1099511628211ULL;
297+
}
298+
return map_hash_mix(h);
299+
}
300+
301+
static uint64_t map_hash_key(Value key) {
302+
if (key.type == VAL_INT) {
303+
return map_hash_mix((uint64_t)key.as.i ^ 0x9e3779b97f4a7c15ULL);
304+
}
305+
if (key.type == VAL_FLT) {
306+
double d = key.as.f;
307+
if (d == 0.0) d = 0.0;
308+
uint64_t bits = 0;
309+
memcpy(&bits, &d, sizeof(uint64_t));
310+
return map_hash_mix(bits ^ 0x243f6a8885a308d3ULL);
311+
}
312+
if (key.type == VAL_STR) {
313+
return map_hash_string(key.as.s);
314+
}
315+
return 0;
316+
}
317+
318+
static int map_key_equals(Value a, Value b) {
319+
if (a.type != b.type) return 0;
320+
if (a.type == VAL_INT) return a.as.i == b.as.i;
321+
if (a.type == VAL_FLT) return a.as.f == b.as.f;
322+
if (a.type == VAL_STR) {
323+
if (!a.as.s || !b.as.s) return a.as.s == b.as.s;
324+
return strcmp(a.as.s, b.as.s) == 0;
325+
}
326+
return 0;
327+
}
328+
329+
static size_t map_recommended_bucket_count(size_t entries) {
330+
size_t needed = entries < 8 ? 16 : entries * 2;
331+
size_t bc = 16;
332+
while (bc < needed && bc <= (SIZE_MAX / 2)) bc <<= 1;
333+
return bc;
334+
}
335+
336+
static void map_rehash(Map* m, size_t bucket_count) {
337+
if (!m) return;
338+
if (bucket_count == 0) bucket_count = map_recommended_bucket_count(m->count);
339+
340+
int64_t* new_buckets = malloc(sizeof(int64_t) * bucket_count);
341+
if (!new_buckets) { fprintf(stderr, "Out of memory\n"); exit(1); }
342+
for (size_t i = 0; i < bucket_count; i++) new_buckets[i] = -1;
343+
344+
for (size_t i = 0; i < m->count; i++) m->items[i].next_hash = -1;
345+
for (size_t i = 0; i < m->count; i++) {
346+
size_t b = (size_t)(map_hash_key(m->items[i].key) % bucket_count);
347+
m->items[i].next_hash = new_buckets[b];
348+
new_buckets[b] = (int64_t)i;
349+
}
350+
351+
free(m->buckets);
352+
m->buckets = new_buckets;
353+
m->bucket_count = bucket_count;
354+
}
355+
356+
static void map_ensure_entry_capacity(Map* m, size_t need) {
357+
if (need <= m->capacity) return;
358+
size_t newc = m->capacity == 0 ? 8 : m->capacity * 2;
359+
while (newc < need && newc <= (SIZE_MAX / 2)) newc *= 2;
360+
m->items = realloc(m->items, sizeof(MapEntry) * newc);
361+
if (!m->items) { fprintf(stderr, "Out of memory\n"); exit(1); }
362+
m->capacity = newc;
363+
}
364+
365+
static int map_maybe_rehash_for_insert(Map* m, size_t post_insert_count) {
366+
if (m->bucket_count == 0 || !m->buckets) {
367+
map_rehash(m, map_recommended_bucket_count(post_insert_count));
368+
return 1;
369+
}
370+
if ((post_insert_count * 4) > (m->bucket_count * 3)) {
371+
size_t new_bc = m->bucket_count <= (SIZE_MAX / 2) ? m->bucket_count * 2 : m->bucket_count;
372+
if (new_bc == m->bucket_count) {
373+
map_rehash(m, m->bucket_count);
374+
} else {
375+
map_rehash(m, new_bc);
376+
}
377+
return 1;
378+
}
379+
return 0;
380+
}
381+
382+
static int map_find_index(Map* m, Value key) {
383+
if (!m || m->count == 0) return -1;
384+
if (!m->buckets || m->bucket_count == 0) {
385+
for (size_t i = 0; i < m->count; i++) {
386+
if (map_key_equals(m->items[i].key, key)) return (int)i;
387+
}
388+
return -1;
389+
}
390+
391+
size_t b = (size_t)(map_hash_key(key) % m->bucket_count);
392+
int64_t idx = m->buckets[b];
393+
while (idx >= 0) {
394+
MapEntry* e = &m->items[(size_t)idx];
395+
if (map_key_equals(e->key, key)) return (int)idx;
396+
idx = e->next_hash;
397+
}
398+
return -1;
399+
}
400+
401+
static int64_t map_append_entry(Map* m, Value key_copy, Value val_copy) {
402+
size_t idx = m->count;
403+
map_ensure_entry_capacity(m, idx + 1);
404+
405+
m->items[idx].key = key_copy;
406+
m->items[idx].value = val_copy;
407+
m->items[idx].next_hash = -1;
408+
m->count = idx + 1;
409+
410+
int rehashed = map_maybe_rehash_for_insert(m, m->count);
411+
if (!rehashed && m->buckets && m->bucket_count > 0) {
412+
size_t b = (size_t)(map_hash_key(m->items[idx].key) % m->bucket_count);
413+
m->items[idx].next_hash = m->buckets[b];
414+
m->buckets[b] = (int64_t)idx;
415+
}
416+
return (int64_t)idx;
417+
}
418+
281419
Value value_map_new(void) {
282420
Value v; v.type = VAL_MAP;
283421
Map* m = malloc(sizeof(Map));
284422
if (!m) { fprintf(stderr, "Out of memory\n"); exit(1); }
285423
m->items = NULL;
286424
m->count = 0;
287425
m->capacity = 0;
426+
m->buckets = NULL;
427+
m->bucket_count = 0;
288428
m->refcount = 1;
289429
mtx_init(&m->lock, 0);
290430
v.as.map = m;
291431
return v;
292432
}
293433

294-
static int map_find_index(Map* m, Value key) {
295-
for (size_t i = 0; i < m->count; i++) {
296-
MapEntry* e = &m->items[i];
297-
if (e->key.type == key.type) {
298-
if (key.type == VAL_INT && e->key.as.i == key.as.i) return (int)i;
299-
if (key.type == VAL_STR && e->key.as.s && key.as.s && strcmp(e->key.as.s, key.as.s) == 0) return (int)i;
300-
if (key.type == VAL_FLT && e->key.as.f == key.as.f) return (int)i;
301-
}
302-
}
303-
return -1;
304-
}
305-
306434
void value_map_set(Value* mapval, Value key, Value val) {
307435
if (!mapval || mapval->type != VAL_MAP) return;
308436
Map* m = mapval->as.map;
309437
int idx = map_find_index(m, key);
310438
if (idx >= 0) {
311-
// replace
312439
value_free(m->items[idx].value);
313440
m->items[idx].value = value_copy(val);
314441
return;
315442
}
316-
if (m->count + 1 > m->capacity) {
317-
size_t newc = m->capacity == 0 ? 8 : m->capacity * 2;
318-
m->items = realloc(m->items, sizeof(MapEntry) * newc);
319-
if (!m->items) { fprintf(stderr, "Out of memory\n"); exit(1); }
320-
m->capacity = newc;
321-
}
322-
m->items[m->count].key = value_copy(key);
323-
m->items[m->count].value = value_copy(val);
324-
m->count++;
443+
map_append_entry(m, value_copy(key), value_copy(val));
325444
}
326445

327446
Value value_map_get(Value mapval, Value key, int* found) {
@@ -339,11 +458,16 @@ void value_map_delete(Value* mapval, Value key) {
339458
Map* m = mapval->as.map;
340459
int idx = map_find_index(m, key);
341460
if (idx < 0) return;
461+
342462
value_free(m->items[idx].key);
343463
value_free(m->items[idx].value);
344-
// compact
345-
for (size_t i = (size_t)idx; i + 1 < m->count; i++) m->items[i] = m->items[i+1];
346-
m->count--;
464+
465+
for (size_t i = (size_t)idx; i + 1 < m->count; i++) m->items[i] = m->items[i + 1];
466+
if (m->count > 0) m->count--;
467+
468+
if (m->buckets && m->bucket_count > 0) {
469+
map_rehash(m, map_recommended_bucket_count(m->count));
470+
}
347471
}
348472

349473
void value_map_set_self(Value* mapval, Value key) {
@@ -352,39 +476,21 @@ void value_map_set_self(Value* mapval, Value key) {
352476
int idx = map_find_index(m, key);
353477
if (idx >= 0) {
354478
value_free(m->items[idx].value);
355-
m->items[idx].value = value_alias(*mapval); // alias points to the same Map
479+
m->items[idx].value = value_alias(*mapval);
356480
return;
357481
}
358-
if (m->count + 1 > m->capacity) {
359-
size_t newc = m->capacity == 0 ? 8 : m->capacity * 2;
360-
m->items = realloc(m->items, sizeof(MapEntry) * newc);
361-
if (!m->items) { fprintf(stderr, "Out of memory\n"); exit(1); }
362-
m->capacity = newc;
363-
}
364-
m->items[m->count].key = value_copy(key);
365-
m->items[m->count].value = value_alias(*mapval);
366-
m->count++;
482+
map_append_entry(m, value_copy(key), value_alias(*mapval));
367483
}
368484

369485
Value* value_map_get_ptr(Value* mapval, Value key, bool create_if_missing) {
370486
if (!mapval || mapval->type != VAL_MAP) return NULL;
371487
Map* m = mapval->as.map;
372488
int idx = map_find_index(m, key);
373-
if (idx >= 0) {
374-
return &m->items[idx].value;
375-
}
489+
if (idx >= 0) return &m->items[idx].value;
376490
if (!create_if_missing) return NULL;
377491

378-
if (m->count + 1 > m->capacity) {
379-
size_t newc = m->capacity == 0 ? 8 : m->capacity * 2;
380-
m->items = realloc(m->items, sizeof(MapEntry) * newc);
381-
if (!m->items) { fprintf(stderr, "Out of memory\n"); exit(1); }
382-
m->capacity = newc;
383-
}
384-
m->items[m->count].key = value_copy(key);
385-
m->items[m->count].value = value_null();
386-
m->count++;
387-
return &m->items[m->count - 1].value;
492+
int64_t new_idx = map_append_entry(m, value_copy(key), value_null());
493+
return &m->items[(size_t)new_idx].value;
388494
}
389495

390496
Value* value_tns_get_ptr(Value v, const size_t* idxs, size_t nidxs) {
@@ -441,7 +547,11 @@ Value value_copy(Value v) {
441547
extern Value value_alias(Value v);
442548
m2->items[i].key = value_alias(m->items[i].key);
443549
m2->items[i].value = value_alias(m->items[i].value);
550+
m2->items[i].next_hash = -1;
444551
}
552+
m2->buckets = NULL;
553+
m2->bucket_count = 0;
554+
if (m2->count > 0) map_rehash(m2, map_recommended_bucket_count(m2->count));
445555
m2->refcount = 1;
446556
mtx_init(&m2->lock, 0);
447557
out.as.map = m2;
@@ -516,7 +626,11 @@ Value value_deep_copy(Value v) {
516626
for (size_t i = 0; i < m->count; i++) {
517627
m2->items[i].key = value_deep_copy(m->items[i].key);
518628
m2->items[i].value = value_deep_copy(m->items[i].value);
629+
m2->items[i].next_hash = -1;
519630
}
631+
m2->buckets = NULL;
632+
m2->bucket_count = 0;
633+
if (m2->count > 0) map_rehash(m2, map_recommended_bucket_count(m2->count));
520634
m2->refcount = 1;
521635
mtx_init(&m2->lock, 0);
522636
out.as.map = m2;
@@ -564,6 +678,7 @@ void value_free(Value v) {
564678
}
565679
free(m->items);
566680
}
681+
if (m->buckets) free(m->buckets);
567682
mtx_destroy(&m->lock);
568683
free(m);
569684
}

src/value.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,12 +62,15 @@ typedef struct Value {
6262
typedef struct MapEntry {
6363
Value key;
6464
Value value;
65+
int64_t next_hash;
6566
} MapEntry;
6667

6768
typedef struct Map {
6869
MapEntry* items;
6970
size_t count;
7071
size_t capacity;
72+
int64_t* buckets;
73+
size_t bucket_count;
7174
int refcount;
7275
mtx_t lock;
7376
} Map;

0 commit comments

Comments
 (0)