This repository was archived by the owner on Jan 6, 2026. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbenchmark.py
More file actions
executable file
·81 lines (65 loc) · 2.21 KB
/
benchmark.py
File metadata and controls
executable file
·81 lines (65 loc) · 2.21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#!/usr/bin/env python3
from enum import Enum
from pathlib import Path
from timeit import timeit
import dask.dataframe as dd
import subprocess
import typer
class Library(Enum):
conceptnet5 = "conceptnet5"
conceptnet_rocks = "conceptnet_rocks"
LIMIT = 10000
def query(af, items: dd.DataFrame, verbose: bool = False):
edge_count = 0
edges_strs = []
for item in items["uri"]:
edges = af.lookup(item, limit=LIMIT)
if verbose:
def sorted_edge(edge):
result = dict(sorted(edge.items()))
result["sources"] = sorted(
(dict(sorted(source.items())) for source in result["sources"]),
key=lambda x: str(x),
)
return result
edges_strs.extend(str(sorted_edge(edge)) for edge in edges)
edge_count += len(edges)
if verbose:
print("Edges:")
for edge_str in sorted(edges_strs):
print(edge_str)
print(f"Edge count: {edge_count}")
def profile(csv_path: Path, library: Library, verbose: bool):
if library == Library.conceptnet5:
from conceptnet5.db.query import AssertionFinder
script_dir = Path(__file__).resolve().parent
subprocess.call(["sudo", script_dir / "postgresql_clear_cache.sh"])
af = AssertionFinder()
elif library == Library.conceptnet_rocks:
from conceptnet_rocks import AssertionFinder
af = AssertionFinder(close_stdout_and_stderr=True)
af.clear_cache()
else:
raise ValueError(f"Unsupported library: {library}")
items = dd.read_csv(csv_path.expanduser(), keep_default_na=False)
vars_to_pass = {
"query": query,
"af": af,
"items": items,
"verbose": verbose,
}
return timeit(
"""query(
af=af,
items=items,
verbose=verbose,
)""",
number=1,
globals=vars_to_pass,
)
def main(csv_path: Path, library: Library, verbose: bool = False, skip_profile: bool = False):
profile_result = profile(csv_path=csv_path, library=library, verbose=verbose)
if not skip_profile:
print(profile_result)
if __name__ == "__main__":
typer.run(main)