Apq
/
ChineseSubFinder
огледало од https://github.com/allanpk716/ChineseSubFinder.git


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118
							package sub_timeline_fixer

import (
	"fmt"
	"github.com/allanpk716/ChineseSubFinder/internal/logic/sub_parser/ass"
	"github.com/allanpk716/ChineseSubFinder/internal/logic/sub_parser/srt"
	"github.com/allanpk716/ChineseSubFinder/internal/pkg"
	"github.com/allanpk716/ChineseSubFinder/internal/pkg/sub_parser_hub"
	"github.com/james-bowman/nlp"
	"github.com/james-bowman/nlp/measures/pairwise"
	"gonum.org/v1/gonum/mat"
	"path"
	"strings"
	"testing"
)

func TestStopWordCounter(t *testing.T) {

	testDataPath := "../../../TestData/FixTimeline"
	testRootDir, err := pkg.CopyTestData(testDataPath)
	if err != nil {
		t.Fatal(err)
	}

	subParserHub := sub_parser_hub.NewSubParserHub(ass.NewParser(), srt.NewParser())

	bFind, info, err := subParserHub.DetermineFileTypeFromFile(path.Join(testRootDir, "R&M S05E10 - English.srt"))
	if err != nil {
		t.Fatal(err)
	}
	if bFind == false {
		t.Fatal("not match sub types")
	}

	allString := strings.Join(info.OtherLines, " ")

	stopWords := StopWordCounter(strings.ToLower(allString), 5)

	print(len(stopWords))
	println(info.Name)
}

func TestGetOffsetTime(t *testing.T) {
	testDataPath := "../../../TestData/FixTimeline"
	testRootDir, err := pkg.CopyTestData(testDataPath)
	if err != nil {
		t.Fatal(err)
	}

	//enSubFile := path.Join(testRootDir, "R&M S05E01 - English.srt")
	//ch_enSubFile := path.Join(testRootDir, "R&M S05E01 - 简英.srt")

	//enSubFile := path.Join(testRootDir, "R&M S05E10 - English.ass")
	//ch_enSubFile := path.Join(testRootDir, "R&M S05E10 - 简英.ass")
	//ch_enSubFile := path.Join(testRootDir, "R&M S05E10 - 简英-shooter.ass")

	enSubFile := path.Join(testRootDir, "基地 S01E03 - English.ass")
	ch_enSubFile := path.Join(testRootDir, "基地 S01E03 - 简英.ass")

	time, err := GetOffsetTime(enSubFile, ch_enSubFile)
	if err != nil {
		return
	}

	print(time)
}

func TestTFIDF(t *testing.T) {
	testCorpus := []string{
		"The quick brown fox jumped over the lazy dog",
		"hey diddle diddle, the cat and the fiddle",
		"the cow jumped over the moon",
		"the little dog laughed to see such fun",
		"and the dish ran away with the spoon",
	}

	query := "the brown fox ran around the dog"

	vectoriser := nlp.NewCountVectoriser(StopWords...)
	transformer := nlp.NewTfidfTransformer()

	// set k (the number of dimensions following truncation) to 4
	reducer := nlp.NewTruncatedSVD(4)

	lsiPipeline := nlp.NewPipeline(vectoriser, transformer, reducer)

	// Transform the corpus into an LSI fitting the model to the documents in the process
	lsi, err := lsiPipeline.FitTransform(testCorpus...)
	if err != nil {
		fmt.Printf("Failed to process documents because %v", err)
		return
	}

	// run the query through the same pipeline that was fitted to the corpus and
	// to project it into the same dimensional space
	queryVector, err := lsiPipeline.Transform(query)
	if err != nil {
		fmt.Printf("Failed to process documents because %v", err)
		return
	}

	// iterate over document feature vectors (columns) in the LSI matrix and compare
	// with the query vector for similarity.  Similarity is determined by the difference
	// between the angles of the vectors known as the cosine similarity
	highestSimilarity := -1.0
	var matched int
	_, docs := lsi.Dims()
	for i := 0; i < docs; i++ {
		similarity := pairwise.CosineSimilarity(queryVector.(mat.ColViewer).ColView(0), lsi.(mat.ColViewer).ColView(i))
		if similarity > highestSimilarity {
			matched = i
			highestSimilarity = similarity
		}
	}

	fmt.Printf("Matched '%s'", testCorpus[matched])
	// Output: Matched 'The quick brown fox jumped over the lazy dog'
}