-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathrelation_extractor.py
More file actions
52 lines (46 loc) · 1.8 KB
/
relation_extractor.py
File metadata and controls
52 lines (46 loc) · 1.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import os
from collections import defaultdict
import re
import configure
# retrieve the regular expression for each relation from 'relation_patterns.txt'
def get_rel():
# var_map: store the trigger phrases for the regular expressions
var_map = {}
# rel_map: store the regular expressions for each relation
rel_map = defaultdict(list)
with open(os.path.join(configure.RESOURCE_PATH, 'relation_patterns.txt'), 'r') as patterns:
for line in patterns:
if line.startswith('#') or len(line) == 0 or re.search(r'^\s', line):
continue
if line.startswith('@'):
var_map[line.split('::')[0]] = line.split('::')[1].strip()
else:
rel = line.split('::')[0].strip()
pat = line.split('::')[1].strip()
for key, value in var_map.items():
pat = pat.replace(key, value)
rel_map[rel].append(pat)
return rel_map
def extract_relation(snippets):
rel_map = get_rel()
for snippet in snippets:
if 'entities' not in snippet.keys():
continue
# number of treatment entities in the snippet
entity_num = len(snippet['entities'])
rep = snippet['representation']
print('processing:\t', rep)
for key, value in rel_map.items():
if int(key.split('@')[1]) != entity_num:
continue
for item in value:
if re.search(item, rep):
print('relation found:\t', key, item)
snippet['relation'] = key.split('@')[0]
break
if 'relation' in snippet.keys():
break
if 'relation' not in snippet.keys():
snippet['relation'] = 'N/A'
def run(snippets):
extract_relation(snippets)