tfidf.go 764 B

12345678910111213141516171819202122232425
  1. package sub_timeline_fixer
  2. import (
  3. "errors"
  4. "fmt"
  5. "github.com/james-bowman/nlp"
  6. "gonum.org/v1/gonum/mat"
  7. )
  8. // NewTFIDF 初始化 TF-IDF
  9. func NewTFIDF(testCorpus []string) (*nlp.Pipeline, mat.Matrix, error) {
  10. vectors := nlp.NewCountVectoriser(EnStopWords...)
  11. transformer := nlp.NewTfidfTransformer()
  12. // set k (the number of dimensions following truncation) to 4
  13. reducer := nlp.NewTruncatedSVD(4)
  14. lsiPipeline := nlp.NewPipeline(vectors, transformer, reducer)
  15. // Transform the corpus into an LSI fitting the model to the documents in the process
  16. lsi, err := lsiPipeline.FitTransform(testCorpus...)
  17. if err != nil {
  18. return nil, lsi, errors.New(fmt.Sprintf("Failed to process testCorpus documents because %v", err))
  19. }
  20. return lsiPipeline, lsi, nil
  21. }