Browse Source

修复字幕时间轴比较的问题

Signed-off-by: 716 <[email protected]>
716 4 years ago
parent
commit
97dd034ba0

+ 1 - 0
.gitignore

@@ -41,3 +41,4 @@
 /cmd/chinesesubfinder/Logs
 /cmd/chinesesubfinder/settings.db
 /TestData/FixTimeline/test
+/internal/pkg/sub_timeline_fixer/bar.html

+ 3 - 3
go.mod

@@ -13,10 +13,11 @@ require (
 	github.com/bodgit/sevenzip v1.1.0
 	github.com/fsnotify/fsnotify v1.4.9 // indirect
 	github.com/go-creed/sat v1.0.3
+	github.com/go-echarts/go-echarts/v2 v2.2.4
 	github.com/go-resty/resty/v2 v2.6.0
 	github.com/go-rod/rod v0.101.7
 	github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
-	github.com/james-bowman/nlp v0.0.0-20210511120306-26d441fa0ded // indirect
+	github.com/james-bowman/nlp v0.0.0-20210511120306-26d441fa0ded
 	github.com/james-bowman/sparse v0.0.0-20210729090128-1e6c7dd483e9 // indirect
 	github.com/jinzhu/now v1.1.2
 	github.com/jonboulle/clockwork v0.2.2 // indirect
@@ -32,7 +33,6 @@ require (
 	github.com/nzlov/chardet v0.0.0-20190815145004-022cbcf483f9
 	github.com/panjf2000/ants/v2 v2.4.5
 	github.com/pelletier/go-toml v1.8.1 // indirect
-	github.com/pkg/errors v0.9.1 // indirect
 	github.com/robfig/cron/v3 v3.0.0
 	github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca
 	github.com/sirupsen/logrus v1.8.1
@@ -49,7 +49,7 @@ require (
 	golang.org/x/net v0.0.0-20210614182718-04defd469f4e
 	golang.org/x/sys v0.0.0-20210616094352-59db8d763f22 // indirect
 	golang.org/x/text v0.3.6
-	gonum.org/v1/gonum v0.9.3 // indirect
+	gonum.org/v1/gonum v0.9.3
 	gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect
 	gopkg.in/errgo.v2 v2.1.0
 	gopkg.in/ini.v1 v1.62.0 // indirect

+ 5 - 1
go.sum

@@ -96,6 +96,8 @@ github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMo
 github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4=
 github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
 github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
+github.com/go-echarts/go-echarts/v2 v2.2.4 h1:SKJpdyNIyD65XjbUZjzg6SwccTNXEgmh+PlaO23g2H0=
+github.com/go-echarts/go-echarts/v2 v2.2.4/go.mod h1:6TOomEztzGDVDkOSCFBq3ed7xOYfbOqhaBzD0YV771A=
 github.com/go-fonts/dejavu v0.1.0/go.mod h1:4Wt4I4OU2Nq9asgDCteaAaWZOV24E+0/Pwo0gppep4g=
 github.com/go-fonts/latin-modern v0.2.0/go.mod h1:rQVLdDMK+mK1xscDwsqM5J8U2jrRa3T0ecnM9pNujks=
 github.com/go-fonts/liberation v0.1.1/go.mod h1:K6qoJYypsmfVjWg8KOVDQhLc8UDgIK2HYqyqAO9z7GY=
@@ -333,6 +335,7 @@ github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+
 github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
 github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
 github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
+github.com/stretchr/testify v1.6.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
 github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
 github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
 github.com/subosito/gotenv v1.2.0 h1:Slr1R9HxAlEKefgq5jn9U+DnETlIUa6HfgEzj0g5d7s=
@@ -418,8 +421,8 @@ golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY=
 golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
 golang.org/x/mod v0.1.1-0.20191209134235-331c550502dd/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
 golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
-golang.org/x/mod v0.3.0 h1:RM4zey1++hCTbCVQfnWeKs9/IEsaBLA8vTkd0WVtmH4=
 golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
+golang.org/x/mod v0.3.1-0.20200828183125-ce943fd02449 h1:xUIPaMhvROX9dhPvRCenIJtU78+lbEenGbgqB5hfHCQ=
 golang.org/x/mod v0.3.1-0.20200828183125-ce943fd02449/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
 golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
@@ -541,6 +544,7 @@ gonum.org/v1/gonum v0.0.0-20180816165407-929014505bf4/go.mod h1:Y+Yx5eoAFn32cQvJ
 gonum.org/v1/gonum v0.8.2/go.mod h1:oe/vMfY3deqTw+1EZJhuvEW2iwGF1bW9wwu7XCu0+v0=
 gonum.org/v1/gonum v0.9.3 h1:DnoIG+QAMaF5NvxnGe/oKsgKcAc6PcUyl8q0VetfQ8s=
 gonum.org/v1/gonum v0.9.3/go.mod h1:TZumC3NeyVQskjXqmyWt4S3bINhy7B4eYwW69EbyX+0=
+gonum.org/v1/netlib v0.0.0-20190313105609-8cb42192e0e0 h1:OE9mWmgKkjJyEmDAAtGMPjXu+YNeGvK9VTSHY6+Qihc=
 gonum.org/v1/netlib v0.0.0-20190313105609-8cb42192e0e0/go.mod h1:wa6Ws7BG/ESfp6dHfk7C6KdzKA7wR7u/rKwOGE66zvw=
 gonum.org/v1/plot v0.0.0-20190515093506-e2840ee46a6b/go.mod h1:Wt8AAjI+ypCyYX3nZBvf6cAIx93T+c/OS2HFAYskSZc=
 gonum.org/v1/plot v0.9.0/go.mod h1:3Pcqqmp6RHvJI72kgb8fThyUnav364FOsdDo2aGW5lY=

+ 3 - 2
internal/pkg/emby_helper/emby_test.go

@@ -38,7 +38,7 @@ func TestEmbyHelper_GetItemVideoInfo(t *testing.T) {
 	// 96564 -- The Bad Batch - S01E11
 	// R&M S05E10  2 org english, 5 简英 145499
 	// 基地 S01E03 166840
-	videoInfo, err := em.GetItemVideoInfo("166840")
+	videoInfo, err := em.GetItemVideoInfo("145499")
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -86,8 +86,9 @@ func TestEmbyApi_GetSubFileData(t *testing.T) {
 	// R&M S05E10  2 org english, 5 简英					"145499", "c4678509adb72a8b5034bdac2f1fccde", "5", ".ass"
 	// 基地 S01E03		2=eng 	6=chi 	45=简英			"166840", "d6c68ec6097aeceb9f5c1d82add66213", "2", ".ass"
 
+	subFileData, err := em.GetSubFileData("145499", "c4678509adb72a8b5034bdac2f1fccde", "4", ".ass")
 	//subFileData, err := em.GetSubFileData("145499", "c4678509adb72a8b5034bdac2f1fccde", "5", ".ass")
-	subFileData, err := em.GetSubFileData("166840", "d6c68ec6097aeceb9f5c1d82add66213", "45", ".ass")
+	//subFileData, err := em.GetSubFileData("166840", "d6c68ec6097aeceb9f5c1d82add66213", "45", ".ass")
 	if err != nil {
 		t.Fatal(err)
 	}

+ 41 - 17
internal/pkg/sub_timeline_fixer/fixer.go

@@ -7,9 +7,13 @@ import (
 	"github.com/allanpk716/ChineseSubFinder/internal/logic/sub_parser/ass"
 	"github.com/allanpk716/ChineseSubFinder/internal/logic/sub_parser/srt"
 	"github.com/allanpk716/ChineseSubFinder/internal/pkg/sub_parser_hub"
+	"github.com/go-echarts/go-echarts/v2/charts"
+	"github.com/go-echarts/go-echarts/v2/opts"
 	"github.com/james-bowman/nlp"
 	"github.com/james-bowman/nlp/measures/pairwise"
 	"gonum.org/v1/gonum/mat"
+	"math/rand"
+	"os"
 
 	"strings"
 	"time"
@@ -79,14 +83,9 @@ func GetOffsetTime(baseEngSubFPath, srcSubFPath string) (time.Duration, error) {
 		return 0, nil
 	}
 
-	print(infoSrc)
-
 	// 构建基准语料库,目前阶段只需要考虑是 En 的就行了
 	var baseCorpus = make([]string, 0)
 	for _, oneDialogueEx := range infoBase.DialoguesEx {
-		if oneDialogueEx.EnLine == "" {
-			continue
-		}
 		baseCorpus = append(baseCorpus, oneDialogueEx.EnLine)
 	}
 	// 初始化
@@ -143,7 +142,7 @@ func GetOffsetTime(baseEngSubFPath, srcSubFPath string) (time.Duration, error) {
 
 		if sc.Add(baseIndex, srcIndex) == false {
 			sc.Clear()
-			continue
+			sc.Add(baseIndex, srcIndex)
 		}
 		if sc.Check() == false {
 			continue
@@ -161,7 +160,6 @@ func GetOffsetTime(baseEngSubFPath, srcSubFPath string) (time.Duration, error) {
 		//	baseIndex, infoBase.DialoguesEx[baseIndex].StartTime, infoBase.DialoguesEx[baseIndex].EndTime, baseCorpus[baseIndex],
 		//	srcIndex, srcOneDialogueEx.StartTime, srcOneDialogueEx.EndTime, srcOneDialogueEx.EnLine))
 	}
-	println("---------------------------------------------")
 	timeFormat := ""
 	if infoBase.Ext == common.SubExtASS || infoBase.Ext == common.SubExtSSA {
 		timeFormat = timeFormatAss
@@ -169,31 +167,32 @@ func GetOffsetTime(baseEngSubFPath, srcSubFPath string) (time.Duration, error) {
 		timeFormat = timeFormatSrt
 	}
 	// 上面找出了连续匹配 maxCompareDialogue:N 次的字幕语句块
+	// 求出平均时间偏移
 	for _, matchIndexItem := range matchIndexList {
 
 		for i := 0; i < maxCompareDialogue; i++ {
-
+			// 这里会统计连续的这 5 句话的时间差
 			tmpBaseIndex := matchIndexItem.BaseNowIndex + i
 			tmpSrcIndex := matchIndexItem.SrcNowIndex + i
 
-			srtTimeStart, err := time.Parse(timeFormat, infoSrc.DialoguesEx[tmpSrcIndex].StartTime)
+			baseTimeStart, err := time.Parse(timeFormat, infoBase.DialoguesEx[tmpBaseIndex].StartTime)
 			if err != nil {
-				println("srtTimeStart", err)
+				println("baseTimeStart", err)
 				continue
 			}
-			srtTimeEnd, err := time.Parse(timeFormat, infoSrc.DialoguesEx[tmpSrcIndex].EndTime)
+			baseTimeEnd, err := time.Parse(timeFormat, infoBase.DialoguesEx[tmpBaseIndex].EndTime)
 			if err != nil {
-				println("srtTimeEnd", err)
+				println("baseTimeEnd", err)
 				continue
 			}
-			baseTimeStart, err := time.Parse(timeFormat, infoBase.DialoguesEx[tmpBaseIndex].StartTime)
+			srtTimeStart, err := time.Parse(timeFormat, infoSrc.DialoguesEx[tmpSrcIndex].StartTime)
 			if err != nil {
-				println("baseTimeStart", err)
+				println("srtTimeStart", err)
 				continue
 			}
-			baseTimeEnd, err := time.Parse(timeFormat, infoBase.DialoguesEx[tmpBaseIndex].EndTime)
+			srtTimeEnd, err := time.Parse(timeFormat, infoSrc.DialoguesEx[tmpSrcIndex].EndTime)
 			if err != nil {
-				println("baseTimeEnd", err)
+				println("srtTimeEnd", err)
 				continue
 			}
 
@@ -202,14 +201,39 @@ func GetOffsetTime(baseEngSubFPath, srcSubFPath string) (time.Duration, error) {
 
 			println(fmt.Sprintf("Diff Start-End: %s - %s Base[%d] %s-%s '%s' <--> Src[%d] %s-%s '%s'",
 				TimeDiffStart, TimeDiffEnd,
-				tmpBaseIndex, infoBase.DialoguesEx[tmpBaseIndex].StartTime, infoBase.DialoguesEx[tmpBaseIndex].EndTime, baseCorpus[tmpBaseIndex],
+				tmpBaseIndex, infoBase.DialoguesEx[tmpBaseIndex].StartTime, infoBase.DialoguesEx[tmpBaseIndex].EndTime, infoBase.DialoguesEx[tmpBaseIndex].EnLine,
 				tmpSrcIndex, infoSrc.DialoguesEx[tmpSrcIndex].StartTime, infoSrc.DialoguesEx[tmpSrcIndex].EndTime, infoSrc.DialoguesEx[tmpSrcIndex].EnLine))
 		}
 		println("---------------------------------------------")
 	}
 
+	// 1.New 一个条形图对象
+	bar := charts.NewBar()
+	// 2.设置 标题 和 子标题
+	bar.SetGlobalOptions(charts.WithTitleOpts(opts.Title{
+		Title:    "My first bar chart generated by go-echarts",
+		Subtitle: "It's extremely easy to use, right?",
+	}))
+
+	// 3.设置 数据组
+	bar.SetXAxis([]string{"Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"}).
+		AddSeries("Category A", generateBarItems()).
+		AddSeries("Category B", generateBarItems())
+	// 4.绘图 生成html
+	f, _ := os.Create("bar.html")
+	bar.Render(f)
+
 	return 0, nil
 }
 
+// 数据生成数据
+func generateBarItems() []opts.BarData {
+	items := make([]opts.BarData, 0)
+	for i := 0; i < 7; i++ {
+		items = append(items, opts.BarData{Value: rand.Intn(300)})
+	}
+	return items
+}
+
 const timeFormatAss = "15:04:05.00"
 const timeFormatSrt = "15:04:05,000"

+ 6 - 2
internal/pkg/sub_timeline_fixer/fixer_test.go

@@ -47,11 +47,15 @@ func TestGetOffsetTime(t *testing.T) {
 		t.Fatal(err)
 	}
 
-	enSubFile := path.Join(testRootDir, "R&M S05E01 - English.srt")
-	ch_enSubFile := path.Join(testRootDir, "R&M S05E01 - 简英.srt")
+	//enSubFile := path.Join(testRootDir, "R&M S05E01 - English.srt")
+	//ch_enSubFile := path.Join(testRootDir, "R&M S05E01 - 简英.srt")
 
 	//enSubFile := path.Join(testRootDir, "R&M S05E10 - English.ass")
 	//ch_enSubFile := path.Join(testRootDir, "R&M S05E10 - 简英.ass")
+	//ch_enSubFile := path.Join(testRootDir, "R&M S05E10 - 简英-shooter.ass")
+
+	enSubFile := path.Join(testRootDir, "基地 S01E03 - English.ass")
+	ch_enSubFile := path.Join(testRootDir, "基地 S01E03 - 简英.ass")
 
 	time, err := GetOffsetTime(enSubFile, ch_enSubFile)
 	if err != nil {