-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_hoverpy_scikitlearn.py
More file actions
97 lines (73 loc) · 2.72 KB
/
test_hoverpy_scikitlearn.py
File metadata and controls
97 lines (73 loc) · 2.72 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import unittest
from hoverpy_scikitlearn import *
class test_hn(unittest.TestCase):
def test_ask(self):
stories = getHNData(sub="jobstories")
for story in stories:
if "hiring" in story:
self.assertTrue(True)
return
self.assertTrue(False)
def test_show(self):
stories = getHNData(sub="showstories")
for story in stories:
if "show" in story:
self.assertTrue(True)
return
self.assertTrue(False)
def test_ask(self):
stories = getHNData(sub="askstories")
for story in stories:
if "ask" in story:
self.assertTrue(True)
return
self.assertTrue(False)
class test_reddit(unittest.TestCase):
def generic_sub_tester(self, sub):
stories = getRedditData(sub=sub)
for story in stories:
if sub in story:
return True
def test_linux(self):
self.assertTrue(self.generic_sub_tester("linux"))
def test_linux(self):
self.assertTrue(self.generic_sub_tester("python"))
def test_music(self):
self.assertTrue(self.generic_sub_tester("music"))
class test_classifier(unittest.TestCase):
def test_classifier(self):
try:
import scipy
except:
print("scipy module not installed - quitting")
return
titles, target = doMining()
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.naive_bayes import MultinomialNB
count_vect = CountVectorizer()
X_train_counts = count_vect.fit_transform(titles)
tfidf_transformer = TfidfTransformer()
X_train_tfidf = tfidf_transformer.fit_transform(X_train_counts)
clf = MultinomialNB().fit(X_train_tfidf, target)
def predict(sentences, answers):
X_new_counts = count_vect.transform(sentences)
X_new_tfidf = tfidf_transformer.transform(X_new_counts)
predicted = clf.predict(X_new_tfidf)
for doc, category, answer in zip(sentences, predicted, answers):
self.assertEquals(subs[category], answer)
tests = [
"powershell and openssl compatability testing",
"compiling source code on ubuntu",
"wifi drivers keep crashing",
"training day was a great movie with a legendary director"
]
answers = [
("reddit", "linux"),
("reddit", "linux"),
("reddit", "linux"),
("reddit", "movies"),
]
predict(tests, answers)
if __name__ == "__main__":
unittest.main()