-
Notifications
You must be signed in to change notification settings - Fork 10
Expand file tree
/
Copy pathweight.go
More file actions
47 lines (40 loc) · 1.18 KB
/
weight.go
File metadata and controls
47 lines (40 loc) · 1.18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
package classifier
import (
"math"
)
// WeightSchemeStrategy provides support for pluggable weight schemes
type WeightSchemeStrategy func(doc map[string]float64) WeightScheme
// WeightScheme provides a contract for term frequency weight schemes
type WeightScheme func(term string) float64
// Binary weight scheme: 1 if present; 0 otherwise
func Binary(doc map[string]float64) WeightScheme {
return func(term string) float64 {
if _, ok := doc[term]; ok {
return 1
}
return 0
}
}
// BagOfWords weight scheme: counts the number of occurrences
func BagOfWords(doc map[string]float64) WeightScheme {
return func(term string) float64 {
return doc[term]
}
}
// TermFrequency weight scheme; counts the number of occurrences divided by
// the number of terms within a document
func TermFrequency(doc map[string]float64) WeightScheme {
return func(term string) float64 {
return math.Sqrt(doc[term] / float64(len(doc)))
}
}
// LogNorm weight scheme: returns the natural log of the number of occurrences of a term
func LogNorm(doc map[string]float64) WeightScheme {
return func(term string) float64 {
count := doc[term]
if count <= 0 {
return 0
}
return math.Log1p(count)
}
}