-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdatarules.json
More file actions
40 lines (40 loc) · 1.33 KB
/
datarules.json
File metadata and controls
40 lines (40 loc) · 1.33 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
{
"rules": {
"forum": {
"raw_expr": true,
"to_str": false,
"eval": "urllib.splithost(urllib.splittype(response.url)[1])[0]"
},
"author": {
"raw_expr": false,
"to_str": false,
"eval": "find(name='div', attrs={'class': 'user-details'}).a.string.strip()"
},
"url": {
"raw_expr": true,
"to_str": false,
"eval": "response.url"
},
"title": {
"raw_expr": false,
"to_str": false,
"eval": "find(name='a', attrs={'class': 'question-hyperlink'}).string.strip()"
},
"datetime": {
"raw_expr": true,
"to_str": false,
"eval": "datetime.strptime(soup.find(name='div',attrs={'class': ['module', 'question-stats']}).find_all(name='p',attrs={'class': 'label-key'})[1]['title'].strip()[:-1], '%Y-%m-%d %H:%M:%S')"
},
"content": {
"raw_expr": false,
"to_str": true,
"eval": "find(name='div', attrs={'class': 'post-text', 'itemprop': 'text'})"
},
"question_id": {
"raw_expr": true,
"to_str": false,
"eval": "int(re.search('(?<=(questions))/(\\d+)/',response.url).group(2))"
}
},
"mode": "BeautifulSoup"
}