Browse Source

新增中文的 stopwords

Signed-off-by: allan716 <[email protected]>
allan716 3 years ago
parent
commit
bce233995b

+ 3 - 2
pkg/sub_timeline_fixer/fixer_test.go

@@ -1,10 +1,11 @@
 package sub_timeline_fixer
 
 import (
+	"testing"
+
 	"github.com/james-bowman/nlp"
 	"github.com/james-bowman/nlp/measures/pairwise"
 	"gonum.org/v1/gonum/mat"
-	"testing"
 )
 
 func TestStopWordCounter(t *testing.T) {
@@ -38,7 +39,7 @@ func TestTFIDF(t *testing.T) {
 
 	query := "the brown fox ran around the dog"
 
-	vectoriser := nlp.NewCountVectoriser(StopWords...)
+	vectoriser := nlp.NewCountVectoriser(EnStopWords...)
 	transformer := nlp.NewTfidfTransformer()
 
 	// set k (the number of dimensions following truncation) to 4

File diff suppressed because it is too large
+ 0 - 0
pkg/sub_timeline_fixer/stop_words.go


+ 2 - 1
pkg/sub_timeline_fixer/tfidf.go

@@ -3,13 +3,14 @@ package sub_timeline_fixer
 import (
 	"errors"
 	"fmt"
+
 	"github.com/james-bowman/nlp"
 	"gonum.org/v1/gonum/mat"
 )
 
 // NewTFIDF 初始化 TF-IDF
 func NewTFIDF(testCorpus []string) (*nlp.Pipeline, mat.Matrix, error) {
-	vectors := nlp.NewCountVectoriser(StopWords...)
+	vectors := nlp.NewCountVectoriser(EnStopWords...)
 	transformer := nlp.NewTfidfTransformer()
 	// set k (the number of dimensions following truncation) to 4
 	reducer := nlp.NewTruncatedSVD(4)

Some files were not shown because too many files changed in this diff