Skip to content

Commit e86ff4a

Browse files
authored
Merge pull request #49 from tidesdb/0-9-6
addition of db stats, purge, and sync wal methods with tests; bumped …
2 parents e4c3669 + ddb0af8 commit e86ff4a

4 files changed

Lines changed: 329 additions & 2 deletions

File tree

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "tidesdb"
7-
version = "0.9.5"
7+
version = "0.9.6"
88
description = "Official Python bindings for TidesDB - A high-performance embedded key-value storage engine"
99
readme = "README.md"
1010
requires-python = ">=3.10"

src/tidesdb/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
ColumnFamilyConfig,
1717
Stats,
1818
CacheStats,
19+
DbStats,
1920
CompressionAlgorithm,
2021
SyncMode,
2122
LogLevel,
@@ -30,7 +31,7 @@
3031
COMMIT_HOOK_FUNC,
3132
)
3233

33-
__version__ = "0.9.5"
34+
__version__ = "0.9.6"
3435
__all__ = [
3536
"TidesDB",
3637
"Transaction",
@@ -40,6 +41,7 @@
4041
"ColumnFamilyConfig",
4142
"Stats",
4243
"CacheStats",
44+
"DbStats",
4345
"CompressionAlgorithm",
4446
"SyncMode",
4547
"LogLevel",

src/tidesdb/tidesdb.py

Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,28 @@ class _CCacheStats(Structure):
275275
]
276276

277277

278+
class _CDbStats(Structure):
279+
"""C structure for tidesdb_db_stats_t."""
280+
281+
_fields_ = [
282+
("num_column_families", c_int),
283+
("total_memory", c_uint64),
284+
("available_memory", c_uint64),
285+
("resolved_memory_limit", c_size_t),
286+
("memory_pressure_level", c_int),
287+
("flush_pending_count", c_int),
288+
("total_memtable_bytes", c_int64),
289+
("total_immutable_count", c_int),
290+
("total_sstable_count", c_int),
291+
("total_data_size_bytes", c_uint64),
292+
("num_open_sstables", c_int),
293+
("global_seq", c_uint64),
294+
("txn_memory_bytes", c_int64),
295+
("compaction_queue_size", c_size_t),
296+
("flush_queue_size", c_size_t),
297+
]
298+
299+
278300
_lib.tidesdb_default_column_family_config.argtypes = []
279301
_lib.tidesdb_default_column_family_config.restype = _CColumnFamilyConfig
280302

@@ -451,6 +473,18 @@ class _CCacheStats(Structure):
451473
_lib.tidesdb_delete_column_family.argtypes = [c_void_p, c_void_p]
452474
_lib.tidesdb_delete_column_family.restype = c_int
453475

476+
_lib.tidesdb_purge_cf.argtypes = [c_void_p]
477+
_lib.tidesdb_purge_cf.restype = c_int
478+
479+
_lib.tidesdb_purge.argtypes = [c_void_p]
480+
_lib.tidesdb_purge.restype = c_int
481+
482+
_lib.tidesdb_sync_wal.argtypes = [c_void_p]
483+
_lib.tidesdb_sync_wal.restype = c_int
484+
485+
_lib.tidesdb_get_db_stats.argtypes = [c_void_p, POINTER(_CDbStats)]
486+
_lib.tidesdb_get_db_stats.restype = c_int
487+
454488

455489
@dataclass
456490
class Config:
@@ -565,6 +599,27 @@ class CacheStats:
565599
num_partitions: int
566600

567601

602+
@dataclass
603+
class DbStats:
604+
"""Database-level aggregate statistics."""
605+
606+
num_column_families: int
607+
total_memory: int
608+
available_memory: int
609+
resolved_memory_limit: int
610+
memory_pressure_level: int
611+
flush_pending_count: int
612+
total_memtable_bytes: int
613+
total_immutable_count: int
614+
total_sstable_count: int
615+
total_data_size_bytes: int
616+
num_open_sstables: int
617+
global_seq: int
618+
txn_memory_bytes: int
619+
compaction_queue_size: int
620+
flush_queue_size: int
621+
622+
568623
@dataclass
569624
class CommitOp:
570625
"""A single operation from a committed transaction batch."""
@@ -816,6 +871,48 @@ def is_compacting(self) -> bool:
816871
"""Check if a compaction operation is in progress for this column family."""
817872
return bool(_lib.tidesdb_is_compacting(self._cf))
818873

874+
def purge(self) -> None:
875+
"""
876+
Force a synchronous flush and aggressive compaction for this column family.
877+
878+
Unlike flush_memtable() and compact() (which are non-blocking), purge blocks
879+
until all flush and compaction I/O is complete.
880+
881+
Behavior:
882+
1. Waits for any in-progress flush to complete
883+
2. Force-flushes the active memtable (even if below threshold)
884+
3. Waits for flush I/O to fully complete
885+
4. Waits for any in-progress compaction to complete
886+
5. Triggers synchronous compaction inline (bypasses the compaction queue)
887+
6. Waits for any queued compaction to drain
888+
889+
Use cases:
890+
- Before backup or checkpoint: ensure all data is on disk and compacted
891+
- After bulk deletes: reclaim space immediately by compacting away tombstones
892+
- Manual maintenance: force a clean state during a maintenance window
893+
- Pre-shutdown: ensure all pending work is complete before closing
894+
"""
895+
result = _lib.tidesdb_purge_cf(self._cf)
896+
if result != TDB_SUCCESS:
897+
raise TidesDBError.from_code(result, "failed to purge column family")
898+
899+
def sync_wal(self) -> None:
900+
"""
901+
Force an immediate fsync of the active write-ahead log for this column family.
902+
903+
This is useful for explicit durability control when using SYNC_NONE or
904+
SYNC_INTERVAL modes.
905+
906+
Use cases:
907+
- Application-controlled durability: sync the WAL at specific points
908+
- Pre-checkpoint: ensure all buffered WAL data is on disk
909+
- Graceful shutdown: flush WAL buffers before closing
910+
- Critical writes: force durability for specific high-value writes
911+
"""
912+
result = _lib.tidesdb_sync_wal(self._cf)
913+
if result != TDB_SUCCESS:
914+
raise TidesDBError.from_code(result, "failed to sync WAL")
915+
819916
def update_runtime_config(self, config: ColumnFamilyConfig, persist_to_disk: bool = True) -> None:
820917
"""
821918
Update runtime-safe configuration settings for this column family.
@@ -1656,6 +1753,62 @@ def delete_column_family(self, cf: ColumnFamily) -> None:
16561753
if result != TDB_SUCCESS:
16571754
raise TidesDBError.from_code(result, "failed to delete column family")
16581755

1756+
def purge(self) -> None:
1757+
"""
1758+
Force a synchronous flush and aggressive compaction for all column families,
1759+
then drain both the global flush and compaction queues.
1760+
1761+
Unlike flush_memtable() and compact() (which are non-blocking), purge blocks
1762+
until all work is complete across all column families.
1763+
1764+
Behavior:
1765+
1. Calls purge on each column family
1766+
2. Drains the global flush queue
1767+
3. Drains the global compaction queue
1768+
1769+
Raises:
1770+
TidesDBError: If purge fails for any column family
1771+
"""
1772+
if self._closed:
1773+
raise TidesDBError("Database is closed")
1774+
1775+
result = _lib.tidesdb_purge(self._db)
1776+
if result != TDB_SUCCESS:
1777+
raise TidesDBError.from_code(result, "failed to purge database")
1778+
1779+
def get_db_stats(self) -> DbStats:
1780+
"""
1781+
Get aggregate statistics across the entire database instance.
1782+
1783+
Returns:
1784+
DbStats instance with database-level statistics
1785+
"""
1786+
if self._closed:
1787+
raise TidesDBError("Database is closed")
1788+
1789+
c_stats = _CDbStats()
1790+
result = _lib.tidesdb_get_db_stats(self._db, ctypes.byref(c_stats))
1791+
if result != TDB_SUCCESS:
1792+
raise TidesDBError.from_code(result, "failed to get database stats")
1793+
1794+
return DbStats(
1795+
num_column_families=c_stats.num_column_families,
1796+
total_memory=c_stats.total_memory,
1797+
available_memory=c_stats.available_memory,
1798+
resolved_memory_limit=c_stats.resolved_memory_limit,
1799+
memory_pressure_level=c_stats.memory_pressure_level,
1800+
flush_pending_count=c_stats.flush_pending_count,
1801+
total_memtable_bytes=c_stats.total_memtable_bytes,
1802+
total_immutable_count=c_stats.total_immutable_count,
1803+
total_sstable_count=c_stats.total_sstable_count,
1804+
total_data_size_bytes=c_stats.total_data_size_bytes,
1805+
num_open_sstables=c_stats.num_open_sstables,
1806+
global_seq=c_stats.global_seq,
1807+
txn_memory_bytes=c_stats.txn_memory_bytes,
1808+
compaction_queue_size=c_stats.compaction_queue_size,
1809+
flush_queue_size=c_stats.flush_queue_size,
1810+
)
1811+
16591812
def __enter__(self) -> TidesDB:
16601813
return self
16611814

tests/test_tidesdb.py

Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -967,5 +967,177 @@ def test_config_with_max_memory_usage(self, temp_db_path):
967967
db.close()
968968

969969

970+
class TestPurgeCf:
971+
"""Tests for column family purge operations."""
972+
973+
def test_purge_cf_basic(self, db, cf):
974+
"""Test purging a column family with data."""
975+
with db.begin_txn() as txn:
976+
for i in range(50):
977+
txn.put(cf, f"purge_key_{i}".encode(), f"purge_val_{i}".encode())
978+
txn.commit()
979+
980+
cf.purge()
981+
982+
# Data should still be accessible after purge
983+
with db.begin_txn() as txn:
984+
for i in range(50):
985+
value = txn.get(cf, f"purge_key_{i}".encode())
986+
assert value == f"purge_val_{i}".encode()
987+
988+
def test_purge_cf_empty(self, db, cf):
989+
"""Test purging an empty column family succeeds."""
990+
cf.purge()
991+
992+
def test_purge_cf_after_deletes(self, db, cf):
993+
"""Test purge after bulk deletes reclaims tombstones."""
994+
with db.begin_txn() as txn:
995+
for i in range(20):
996+
txn.put(cf, f"del_key_{i}".encode(), f"del_val_{i}".encode())
997+
txn.commit()
998+
999+
with db.begin_txn() as txn:
1000+
for i in range(20):
1001+
txn.delete(cf, f"del_key_{i}".encode())
1002+
txn.commit()
1003+
1004+
cf.purge()
1005+
1006+
# Deleted keys should not be found
1007+
with db.begin_txn() as txn:
1008+
for i in range(20):
1009+
with pytest.raises(tidesdb.TidesDBError):
1010+
txn.get(cf, f"del_key_{i}".encode())
1011+
1012+
1013+
class TestPurgeDb:
1014+
"""Tests for database-level purge operations."""
1015+
1016+
def test_purge_db_basic(self, db):
1017+
"""Test purging the entire database."""
1018+
db.create_column_family("purge_cf1")
1019+
db.create_column_family("purge_cf2")
1020+
cf1 = db.get_column_family("purge_cf1")
1021+
cf2 = db.get_column_family("purge_cf2")
1022+
1023+
with db.begin_txn() as txn:
1024+
txn.put(cf1, b"k1", b"v1")
1025+
txn.put(cf2, b"k2", b"v2")
1026+
txn.commit()
1027+
1028+
db.purge()
1029+
1030+
# Data should still be accessible
1031+
with db.begin_txn() as txn:
1032+
assert txn.get(cf1, b"k1") == b"v1"
1033+
assert txn.get(cf2, b"k2") == b"v2"
1034+
1035+
db.drop_column_family("purge_cf1")
1036+
db.drop_column_family("purge_cf2")
1037+
1038+
def test_purge_db_empty(self, db):
1039+
"""Test purging a database with no column families succeeds."""
1040+
db.purge()
1041+
1042+
1043+
class TestSyncWal:
1044+
"""Tests for WAL sync operations."""
1045+
1046+
def test_sync_wal_basic(self, db, cf):
1047+
"""Test syncing WAL after writes."""
1048+
with db.begin_txn() as txn:
1049+
txn.put(cf, b"sync_key", b"sync_val")
1050+
txn.commit()
1051+
1052+
cf.sync_wal()
1053+
1054+
# Data should be accessible after sync
1055+
with db.begin_txn() as txn:
1056+
assert txn.get(cf, b"sync_key") == b"sync_val"
1057+
1058+
def test_sync_wal_empty_cf(self, db, cf):
1059+
"""Test syncing WAL on an empty column family."""
1060+
cf.sync_wal()
1061+
1062+
def test_sync_wal_after_batch(self, db, cf):
1063+
"""Test syncing WAL after a batch of writes."""
1064+
with db.begin_txn() as txn:
1065+
for i in range(100):
1066+
txn.put(cf, f"batch_{i}".encode(), f"val_{i}".encode())
1067+
txn.commit()
1068+
1069+
cf.sync_wal()
1070+
1071+
with db.begin_txn() as txn:
1072+
for i in range(100):
1073+
value = txn.get(cf, f"batch_{i}".encode())
1074+
assert value == f"val_{i}".encode()
1075+
1076+
1077+
class TestDbStats:
1078+
"""Tests for database-level statistics."""
1079+
1080+
def test_db_stats_basic(self, db):
1081+
"""Test getting database stats."""
1082+
stats = db.get_db_stats()
1083+
assert isinstance(stats, tidesdb.DbStats)
1084+
assert stats.num_column_families >= 0
1085+
assert stats.total_memory > 0
1086+
assert stats.resolved_memory_limit > 0
1087+
assert stats.memory_pressure_level >= 0
1088+
assert stats.global_seq >= 0
1089+
1090+
def test_db_stats_with_cf(self, db, cf):
1091+
"""Test that db stats reflect column family count."""
1092+
stats = db.get_db_stats()
1093+
assert stats.num_column_families >= 1
1094+
1095+
def test_db_stats_after_writes(self, db, cf):
1096+
"""Test db stats after writing data."""
1097+
with db.begin_txn() as txn:
1098+
for i in range(50):
1099+
txn.put(cf, f"stats_key_{i}".encode(), f"stats_val_{i}".encode())
1100+
txn.commit()
1101+
1102+
stats = db.get_db_stats()
1103+
assert stats.num_column_families >= 1
1104+
assert stats.total_memtable_bytes >= 0
1105+
assert stats.total_sstable_count >= 0
1106+
assert stats.total_data_size_bytes >= 0
1107+
1108+
def test_db_stats_all_fields_present(self, db):
1109+
"""Test that all DbStats fields are populated."""
1110+
stats = db.get_db_stats()
1111+
assert isinstance(stats.num_column_families, int)
1112+
assert isinstance(stats.total_memory, int)
1113+
assert isinstance(stats.available_memory, int)
1114+
assert isinstance(stats.resolved_memory_limit, int)
1115+
assert isinstance(stats.memory_pressure_level, int)
1116+
assert isinstance(stats.flush_pending_count, int)
1117+
assert isinstance(stats.total_memtable_bytes, int)
1118+
assert isinstance(stats.total_immutable_count, int)
1119+
assert isinstance(stats.total_sstable_count, int)
1120+
assert isinstance(stats.total_data_size_bytes, int)
1121+
assert isinstance(stats.num_open_sstables, int)
1122+
assert isinstance(stats.global_seq, int)
1123+
assert isinstance(stats.txn_memory_bytes, int)
1124+
assert isinstance(stats.compaction_queue_size, int)
1125+
assert isinstance(stats.flush_queue_size, int)
1126+
1127+
def test_db_stats_after_purge(self, db, cf):
1128+
"""Test db stats after purging."""
1129+
with db.begin_txn() as txn:
1130+
for i in range(20):
1131+
txn.put(cf, f"purge_stats_{i}".encode(), f"val_{i}".encode())
1132+
txn.commit()
1133+
1134+
db.purge()
1135+
1136+
stats = db.get_db_stats()
1137+
# After purge, flush and compaction queues should be drained
1138+
assert stats.flush_queue_size == 0
1139+
assert stats.compaction_queue_size == 0
1140+
1141+
9701142
if __name__ == "__main__":
9711143
pytest.main([__file__, "-v"])

0 commit comments

Comments
 (0)