converter.go 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591
  1. //Author:TruthHun
  2. //Email:[email protected]
  3. //Date:2018-01-21
  4. package converter
  5. import (
  6. "fmt"
  7. "io/ioutil"
  8. "os"
  9. "path/filepath"
  10. "strings"
  11. "time"
  12. "os/exec"
  13. "errors"
  14. "github.com/lifei6671/mindoc/utils/filetil"
  15. "github.com/lifei6671/mindoc/utils/ziptil"
  16. "github.com/lifei6671/mindoc/utils/cryptil"
  17. "sync"
  18. )
  19. type Converter struct {
  20. BasePath string
  21. OutputPath string
  22. Config Config
  23. Debug bool
  24. GeneratedCover string
  25. }
  26. //目录结构
  27. type Toc struct {
  28. Id int `json:"id"`
  29. Link string `json:"link"`
  30. Pid int `json:"pid"`
  31. Title string `json:"title"`
  32. }
  33. //config.json文件解析结构
  34. type Config struct {
  35. Charset string `json:"charset"` //字符编码,默认utf-8编码
  36. Cover string `json:"cover"` //封面图片,或者封面html文件
  37. Timestamp string `json:"date"` //时间日期,如“2018-01-01 12:12:21”,其实是time.Time格式,但是直接用string就好
  38. Description string `json:"description"` //摘要
  39. Footer string `json:"footer"` //pdf的footer
  40. Header string `json:"header"` //pdf的header
  41. Identifier string `json:"identifier"` //即uuid,留空即可
  42. Language string `json:"language"` //语言,如zh、en、zh-CN、en-US等
  43. Creator string `json:"creator"` //作者,即author
  44. Publisher string `json:"publisher"` //出版单位
  45. Contributor string `json:"contributor"` //同Publisher
  46. Title string `json:"title"` //文档标题
  47. Format []string `json:"format"` //导出格式,可选值:pdf、epub、mobi
  48. FontSize string `json:"font_size"` //默认的pdf导出字体大小
  49. PaperSize string `json:"paper_size"` //页面大小
  50. MarginLeft string `json:"margin_left"` //PDF文档左边距,写数字即可,默认72pt
  51. MarginRight string `json:"margin_right"` //PDF文档左边距,写数字即可,默认72pt
  52. MarginTop string `json:"margin_top"` //PDF文档左边距,写数字即可,默认72pt
  53. MarginBottom string `json:"margin_bottom"` //PDF文档左边距,写数字即可,默认72pt
  54. More []string `json:"more"` //更多导出选项[PDF导出选项,具体参考:https://manual.calibre-ebook.com/generated/en/ebook-convert.html#pdf-output-options]
  55. Toc []Toc `json:"toc"` //目录
  56. ///////////////////////////////////////////
  57. Order []string `json:"-"` //这个不需要赋值
  58. }
  59. var (
  60. output = "output" //文档导出文件夹
  61. ebookConvert = "ebook-convert"
  62. )
  63. // 接口文档 https://manual.calibre-ebook.com/generated/en/ebook-convert.html#table-of-contents
  64. //根据json配置文件,创建文档转化对象
  65. func NewConverter(configFile string, debug ...bool) (converter *Converter, err error) {
  66. var (
  67. cfg Config
  68. basepath string
  69. db bool
  70. )
  71. if len(debug) > 0 {
  72. db = debug[0]
  73. }
  74. if cfg, err = parseConfig(configFile); err == nil {
  75. if basepath, err = filepath.Abs(filepath.Dir(configFile)); err == nil {
  76. //设置默认值
  77. if len(cfg.Timestamp) == 0 {
  78. cfg.Timestamp = time.Now().Format("2006-01-02 15:04:05")
  79. }
  80. if len(cfg.Charset) == 0 {
  81. cfg.Charset = "utf-8"
  82. }
  83. converter = &Converter{
  84. Config: cfg,
  85. BasePath: basepath,
  86. Debug: db,
  87. }
  88. }
  89. }
  90. return
  91. }
  92. //执行文档转换
  93. func (this *Converter) Convert() (err error) {
  94. if !this.Debug { //调试模式下不删除生成的文件
  95. defer this.converterDefer() //最后移除创建的多余而文件
  96. }
  97. if err = this.generateMimeType(); err != nil {
  98. return
  99. }
  100. if err = this.generateMetaInfo(); err != nil {
  101. return
  102. }
  103. if err = this.generateTocNcx(); err != nil { //生成目录
  104. return
  105. }
  106. if err = this.generateSummary(); err != nil { //生成文档内目录
  107. return
  108. }
  109. if err = this.generateTitlePage(); err != nil { //生成封面
  110. return
  111. }
  112. if err = this.generateContentOpf(); err != nil { //这个必须是generate*系列方法的最后一个调用
  113. return
  114. }
  115. //将当前文件夹下的所有文件压缩成zip包,然后直接改名成content.epub
  116. f := filepath.Join(this.OutputPath, "content.epub")
  117. os.Remove(f) //如果原文件存在了,则删除;
  118. if err = ziptil.Zip(this.BasePath,f); err == nil {
  119. //创建导出文件夹
  120. os.Mkdir(this.BasePath+"/"+output, os.ModePerm)
  121. if len(this.Config.Format) > 0 {
  122. var errs []string
  123. group := sync.WaitGroup{}
  124. for _, v := range this.Config.Format {
  125. fmt.Println("convert to " + v)
  126. switch strings.ToLower(v) {
  127. case "epub":
  128. group.Add(1)
  129. go func(group *sync.WaitGroup) {
  130. if err = this.convertToEpub(); err != nil {
  131. errs = append(errs, err.Error())
  132. fmt.Println("转换EPUB文档失败:" + err.Error())
  133. }
  134. group.Done()
  135. }(&group)
  136. case "mobi":
  137. group.Add(1)
  138. go func(group *sync.WaitGroup) {
  139. if err = this.convertToMobi(); err != nil {
  140. errs = append(errs, err.Error())
  141. fmt.Println("转换MOBI文档失败:" + err.Error())
  142. }
  143. group.Done()
  144. }(&group)
  145. case "pdf":
  146. group.Add(1)
  147. go func(group *sync.WaitGroup) {
  148. if err = this.convertToPdf(); err != nil {
  149. fmt.Println("转换PDF文档失败:" + err.Error())
  150. errs = append(errs, err.Error())
  151. }
  152. group.Done()
  153. }(&group)
  154. case "docx":
  155. group.Add(1)
  156. go func(group *sync.WaitGroup) {
  157. if err = this.convertToDocx(); err != nil {
  158. fmt.Println("转换WORD文档失败:" + err.Error())
  159. errs = append(errs, err.Error())
  160. }
  161. group.Done()
  162. }(&group)
  163. }
  164. }
  165. group.Wait()
  166. if len(errs) > 0 {
  167. err = errors.New(strings.Join(errs, "\n"))
  168. }
  169. } else {
  170. err = this.convertToPdf()
  171. if err != nil {
  172. fmt.Println(err)
  173. }
  174. }
  175. } else {
  176. fmt.Println("压缩目录出错" + err.Error())
  177. }
  178. return
  179. }
  180. //删除生成导出文档而创建的文件
  181. func (this *Converter) converterDefer() {
  182. //删除不必要的文件
  183. os.RemoveAll(filepath.Join(this.BasePath, "META-INF"))
  184. os.RemoveAll(filepath.Join(this.BasePath, "content.epub"))
  185. os.RemoveAll(filepath.Join(this.BasePath, "mimetype"))
  186. os.RemoveAll(filepath.Join(this.BasePath, "toc.ncx"))
  187. os.RemoveAll(filepath.Join(this.BasePath, "content.opf"))
  188. os.RemoveAll(filepath.Join(this.BasePath, "titlepage.xhtml")) //封面图片待优化
  189. os.RemoveAll(filepath.Join(this.BasePath, "summary.html")) //文档目录
  190. }
  191. //生成metainfo
  192. func (this *Converter) generateMetaInfo() (err error) {
  193. xml := `<?xml version="1.0"?>
  194. <container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
  195. <rootfiles>
  196. <rootfile full-path="content.opf" media-type="application/oebps-package+xml"/>
  197. </rootfiles>
  198. </container>
  199. `
  200. folder := filepath.Join(this.BasePath, "META-INF")
  201. os.MkdirAll(folder, os.ModePerm)
  202. err = ioutil.WriteFile(filepath.Join(folder, "container.xml"), []byte(xml), os.ModePerm)
  203. return
  204. }
  205. //形成mimetyppe
  206. func (this *Converter) generateMimeType() (err error) {
  207. return ioutil.WriteFile(filepath.Join(this.BasePath, "mimetype"), []byte("application/epub+zip"), os.ModePerm)
  208. }
  209. //生成封面
  210. func (this *Converter) generateTitlePage() (err error) {
  211. if ext := strings.ToLower(filepath.Ext(this.Config.Cover)); !(ext == ".html" || ext == ".xhtml") {
  212. xml := `<?xml version='1.0' encoding='` + this.Config.Charset + `'?>
  213. <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="` + this.Config.Language + `">
  214. <head>
  215. <meta http-equiv="Content-Type" content="text/html; charset=` + this.Config.Charset + `"/>
  216. <meta name="calibre:cover" content="true"/>
  217. <title>Cover</title>
  218. <style type="text/css" title="override_css">
  219. @page {padding: 0pt; margin:0pt}
  220. body { text-align: center; padding:0pt; margin: 0pt; }
  221. </style>
  222. </head>
  223. <body>
  224. <div>
  225. <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" width="100%" height="100%" viewBox="0 0 800 1068" preserveAspectRatio="none">
  226. <image width="800" height="1068" xlink:href="` + strings.TrimPrefix(this.Config.Cover, "./") + `"/>
  227. </svg>
  228. </div>
  229. </body>
  230. </html>
  231. `
  232. if err = ioutil.WriteFile(filepath.Join(this.BasePath, "titlepage.xhtml"), []byte(xml), os.ModePerm); err == nil {
  233. this.GeneratedCover = "titlepage.xhtml"
  234. }
  235. }
  236. return
  237. }
  238. //生成文档目录
  239. func (this *Converter) generateTocNcx() (err error) {
  240. ncx := `<?xml version='1.0' encoding='` + this.Config.Charset + `'?>
  241. <ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" version="2005-1" xml:lang="%v">
  242. <head>
  243. <meta content="4" name="dtb:depth"/>
  244. <meta content="calibre (2.85.1)" name="dtb:generator"/>
  245. <meta content="0" name="dtb:totalPageCount"/>
  246. <meta content="0" name="dtb:maxPageNumber"/>
  247. </head>
  248. <docTitle>
  249. <text>%v</text>
  250. </docTitle>
  251. <navMap>%v</navMap>
  252. </ncx>
  253. `
  254. codes, _ := this.tocToXml(0, 1)
  255. ncx = fmt.Sprintf(ncx, this.Config.Language, this.Config.Title, strings.Join(codes, ""))
  256. return ioutil.WriteFile(filepath.Join(this.BasePath, "toc.ncx"), []byte(ncx), os.ModePerm)
  257. }
  258. //生成文档目录,即summary.html
  259. func (this *Converter) generateSummary() (err error) {
  260. //目录
  261. summary := `<!DOCTYPE html>
  262. <html lang="` + this.Config.Language + `">
  263. <head>
  264. <meta charset="` + this.Config.Charset + `">
  265. <title>目录</title>
  266. <style>
  267. body{margin: 0px;padding: 0px;}h1{text-align: center;padding: 0px;margin: 0px;}ul,li{list-style: none;}
  268. a{text-decoration: none;color: #4183c4;text-decoration: none;font-size: 16px;line-height: 28px;}
  269. </style>
  270. </head>
  271. <body>
  272. <h1>目&nbsp;&nbsp;&nbsp;&nbsp;录</h1>
  273. %v
  274. </body>
  275. </html>`
  276. summary = fmt.Sprintf(summary, strings.Join(this.tocToSummary(0), ""))
  277. return ioutil.WriteFile(filepath.Join(this.BasePath, "summary.html"), []byte(summary), os.ModePerm)
  278. }
  279. //将toc转成toc.ncx文件
  280. func (this *Converter) tocToXml(pid, idx int) (codes []string, next_idx int) {
  281. var code string
  282. for _, toc := range this.Config.Toc {
  283. if toc.Pid == pid {
  284. code, idx = this.getNavPoint(toc, idx)
  285. codes = append(codes, code)
  286. for _, item := range this.Config.Toc {
  287. if item.Pid == toc.Id {
  288. code, idx = this.getNavPoint(item, idx)
  289. codes = append(codes, code)
  290. var code_arr []string
  291. code_arr, idx = this.tocToXml(item.Id, idx)
  292. codes = append(codes, code_arr...)
  293. codes = append(codes, `</navPoint>`)
  294. }
  295. }
  296. codes = append(codes, `</navPoint>`)
  297. }
  298. }
  299. next_idx = idx
  300. return
  301. }
  302. //将toc转成toc.ncx文件
  303. func (this *Converter) tocToSummary(pid int) (summarys []string) {
  304. summarys = append(summarys, "<ul>")
  305. for _, toc := range this.Config.Toc {
  306. if toc.Pid == pid {
  307. summarys = append(summarys, fmt.Sprintf(`<li><a href="%v">%v</a></li>`, toc.Link, toc.Title))
  308. for _, item := range this.Config.Toc {
  309. if item.Pid == toc.Id {
  310. summarys = append(summarys, fmt.Sprintf(`<li><ul><li><a href="%v">%v</a></li>`, item.Link, item.Title))
  311. summarys = append(summarys, "<li>")
  312. summarys = append(summarys, this.tocToSummary(item.Id)...)
  313. summarys = append(summarys, "</li></ul></li>")
  314. }
  315. }
  316. }
  317. }
  318. summarys = append(summarys, "</ul>")
  319. return
  320. }
  321. //生成navPoint
  322. func (this *Converter) getNavPoint(toc Toc, idx int) (navpoint string, nextidx int) {
  323. navpoint = `
  324. <navPoint id="id%v" playOrder="%v">
  325. <navLabel>
  326. <text>%v</text>
  327. </navLabel>
  328. <content src="%v"/>`
  329. navpoint = fmt.Sprintf(navpoint, toc.Id, idx, toc.Title, toc.Link)
  330. this.Config.Order = append(this.Config.Order, toc.Link)
  331. nextidx = idx + 1
  332. return
  333. }
  334. //生成content.opf文件
  335. //倒数第二步调用
  336. func (this *Converter) generateContentOpf() (err error) {
  337. var (
  338. guide string
  339. manifest string
  340. manifestArr []string
  341. spine string //注意:如果存在封面,则需要把封面放在第一个位置
  342. spineArr []string
  343. )
  344. meta := `<dc:title>%v</dc:title>
  345. <dc:contributor opf:role="bkp">%v</dc:contributor>
  346. <dc:publisher>%v</dc:publisher>
  347. <dc:description>%v</dc:description>
  348. <dc:language>%v</dc:language>
  349. <dc:creator opf:file-as="Unknown" opf:role="aut">%v</dc:creator>
  350. <meta name="calibre:timestamp" content="%v"/>
  351. `
  352. meta = fmt.Sprintf(meta, this.Config.Title, this.Config.Contributor, this.Config.Publisher, this.Config.Description, this.Config.Language, this.Config.Creator, this.Config.Timestamp)
  353. if len(this.Config.Cover) > 0 {
  354. meta = meta + `<meta name="cover" content="cover"/>`
  355. guide = `<reference href="titlepage.xhtml" title="Cover" type="cover"/>`
  356. manifest = fmt.Sprintf(`<item href="%v" id="cover" media-type="%v"/>`, this.Config.Cover, GetMediaType(filepath.Ext(this.Config.Cover)))
  357. spineArr = append(spineArr, `<itemref idref="titlepage"/>`)
  358. }
  359. if _, err := os.Stat(this.BasePath + "/summary.html"); err == nil {
  360. spineArr = append(spineArr, `<itemref idref="summary"/>`) //目录
  361. }
  362. //扫描所有文件
  363. if files, err := filetil.ScanFiles(this.BasePath); err == nil {
  364. basePath := strings.Replace(this.BasePath, "\\", "/", -1)
  365. for _, file := range files {
  366. if !file.IsDir {
  367. ext := strings.ToLower(filepath.Ext(file.Path))
  368. sourcefile := strings.TrimPrefix(file.Path, basePath+"/")
  369. id := "ncx"
  370. if ext != ".ncx" {
  371. if file.Name == "titlepage.xhtml" { //封面
  372. id = "titlepage"
  373. } else if file.Name == "summary.html" { //目录
  374. id = "summary"
  375. } else {
  376. id = cryptil.Md5Crypt(sourcefile)
  377. }
  378. }
  379. if mt := GetMediaType(ext); mt != "" { //不是封面图片,且media-type不为空
  380. if sourcefile != strings.TrimLeft(this.Config.Cover, "./") { //不是封面图片,则追加进来。封面图片前面已经追加进来了
  381. manifestArr = append(manifestArr, fmt.Sprintf(`<item href="%v" id="%v" media-type="%v"/>`, sourcefile, id, mt))
  382. }
  383. }
  384. } else {
  385. fmt.Println(file.Path)
  386. }
  387. }
  388. items := make(map[string]string)
  389. for _, link := range this.Config.Order {
  390. id := cryptil.Md5Crypt(link)
  391. if _, ok := items[id]; !ok { //去重
  392. items[id] = id
  393. spineArr = append(spineArr, fmt.Sprintf(`<itemref idref="%v"/>`, id))
  394. }
  395. }
  396. manifest = manifest + strings.Join(manifestArr, "\n")
  397. spine = strings.Join(spineArr, "\n")
  398. } else {
  399. return err
  400. }
  401. pkg := `<?xml version='1.0' encoding='` + this.Config.Charset + `'?>
  402. <package xmlns="http://www.idpf.org/2007/opf" unique-identifier="uuid_id" version="2.0">
  403. <metadata xmlns:opf="http://www.idpf.org/2007/opf" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:calibre="http://calibre.kovidgoyal.net/2009/metadata">
  404. %v
  405. </metadata>
  406. <manifest>
  407. %v
  408. </manifest>
  409. <spine toc="ncx">
  410. %v
  411. </spine>
  412. %v
  413. </package>
  414. `
  415. if len(guide) > 0 {
  416. guide = `<guide>` + guide + `</guide>`
  417. }
  418. pkg = fmt.Sprintf(pkg, meta, manifest, spine, guide)
  419. return ioutil.WriteFile(filepath.Join(this.BasePath, "content.opf"), []byte(pkg), os.ModePerm)
  420. }
  421. //转成epub
  422. func (this *Converter) convertToEpub() (err error) {
  423. args := []string{
  424. filepath.Join(this.OutputPath, "content.epub"),
  425. filepath.Join(this.OutputPath, output, "book.epub"),
  426. }
  427. //cmd := exec.Command(ebookConvert, args...)
  428. //
  429. //if this.Debug {
  430. // fmt.Println(cmd.Args)
  431. //}
  432. //fmt.Println("正在转换EPUB文件", args[0])
  433. //return cmd.Run()
  434. return filetil.CopyFile(args[0],args[1])
  435. }
  436. //转成mobi
  437. func (this *Converter) convertToMobi() (err error) {
  438. args := []string{
  439. filepath.Join(this.OutputPath, "content.epub"),
  440. filepath.Join(this.OutputPath, output, "book.mobi"),
  441. }
  442. cmd := exec.Command(ebookConvert, args...)
  443. if this.Debug {
  444. fmt.Println(cmd.Args)
  445. }
  446. fmt.Println("正在转换 MOBI 文件", args[0])
  447. return cmd.Run()
  448. }
  449. //转成pdf
  450. func (this *Converter) convertToPdf() (err error) {
  451. args := []string{
  452. filepath.Join(this.OutputPath, "content.epub"),
  453. filepath.Join(this.OutputPath, output, "book.pdf"),
  454. }
  455. //页面大小
  456. if len(this.Config.PaperSize) > 0 {
  457. args = append(args, "--paper-size", this.Config.PaperSize)
  458. }
  459. //文字大小
  460. if len(this.Config.FontSize) > 0 {
  461. args = append(args, "--pdf-default-font-size", this.Config.FontSize)
  462. }
  463. //header template
  464. if len(this.Config.Header) > 0 {
  465. args = append(args, "--pdf-header-template", this.Config.Header)
  466. }
  467. //footer template
  468. if len(this.Config.Footer) > 0 {
  469. args = append(args, "--pdf-footer-template",this.Config.Footer)
  470. }
  471. if strings.Count(this.Config.MarginLeft,"") > 0 {
  472. args = append(args, "--pdf-page-margin-left", this.Config.MarginLeft)
  473. }
  474. if strings.Count(this.Config.MarginTop,"") > 0 {
  475. args = append(args, "--pdf-page-margin-top", this.Config.MarginTop)
  476. }
  477. if strings.Count(this.Config.MarginRight,"") > 0 {
  478. args = append(args, "--pdf-page-margin-right", this.Config.MarginRight)
  479. }
  480. if strings.Count(this.Config.MarginBottom,"") > 0 {
  481. args = append(args, "--pdf-page-margin-bottom", this.Config.MarginBottom)
  482. }
  483. //更多选项
  484. if len(this.Config.More) > 0 {
  485. args = append(args, this.Config.More...)
  486. }
  487. cmd := exec.Command(ebookConvert, args...)
  488. if this.Debug {
  489. fmt.Println(cmd.Args)
  490. }
  491. fmt.Println("正在转换 PDF 文件", args[0])
  492. return cmd.Run()
  493. }
  494. // 转成word
  495. func (this *Converter) convertToDocx() (err error) {
  496. args := []string{
  497. filepath.Join(this.OutputPath , "content.epub"),
  498. filepath.Join(this.OutputPath , output , "book.docx"),
  499. }
  500. args = append(args, "--docx-no-toc")
  501. //页面大小
  502. if len(this.Config.PaperSize) > 0 {
  503. args = append(args, "--docx-page-size", this.Config.PaperSize)
  504. }
  505. if len(this.Config.MarginLeft) > 0 {
  506. args = append(args, "--docx-page-margin-left", this.Config.MarginLeft)
  507. }
  508. if len(this.Config.MarginTop) > 0 {
  509. args = append(args, "--docx-page-margin-top", this.Config.MarginTop)
  510. }
  511. if len(this.Config.MarginRight) > 0 {
  512. args = append(args, "--docx-page-margin-right", this.Config.MarginRight)
  513. }
  514. if len(this.Config.MarginBottom) > 0 {
  515. args = append(args, "--docx-page-margin-bottom", this.Config.MarginBottom)
  516. }
  517. cmd := exec.Command(ebookConvert, args...)
  518. if this.Debug {
  519. fmt.Println(cmd.Args)
  520. }
  521. fmt.Println("正在转换 DOCX 文件", args[0])
  522. return cmd.Run()
  523. }