converter.go 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544
  1. //Author:TruthHun
  2. //Email:[email protected]
  3. //Date:2018-01-21
  4. package converter
  5. import (
  6. "fmt"
  7. "io/ioutil"
  8. "os"
  9. "path/filepath"
  10. "strings"
  11. "time"
  12. "os/exec"
  13. "errors"
  14. "github.com/lifei6671/mindoc/utils/filetil"
  15. "github.com/lifei6671/mindoc/utils/ziptil"
  16. "github.com/lifei6671/mindoc/utils/cryptil"
  17. )
  18. type Converter struct {
  19. BasePath string
  20. Config Config
  21. Debug bool
  22. GeneratedCover string
  23. }
  24. //目录结构
  25. type Toc struct {
  26. Id int `json:"id"`
  27. Link string `json:"link"`
  28. Pid int `json:"pid"`
  29. Title string `json:"title"`
  30. }
  31. //config.json文件解析结构
  32. type Config struct {
  33. Charset string `json:"charset"` //字符编码,默认utf-8编码
  34. Cover string `json:"cover"` //封面图片,或者封面html文件
  35. Timestamp string `json:"date"` //时间日期,如“2018-01-01 12:12:21”,其实是time.Time格式,但是直接用string就好
  36. Description string `json:"description"` //摘要
  37. Footer string `json:"footer"` //pdf的footer
  38. Header string `json:"header"` //pdf的header
  39. Identifier string `json:"identifier"` //即uuid,留空即可
  40. Language string `json:"language"` //语言,如zh、en、zh-CN、en-US等
  41. Creator string `json:"creator"` //作者,即author
  42. Publisher string `json:"publisher"` //出版单位
  43. Contributor string `json:"contributor"` //同Publisher
  44. Title string `json:"title"` //文档标题
  45. Format []string `json:"format"` //导出格式,可选值:pdf、epub、mobi
  46. FontSize string `json:"font_size"` //默认的pdf导出字体大小
  47. PaperSize string `json:"paper_size"` //页面大小
  48. MarginLeft string `json:"margin_left"` //PDF文档左边距,写数字即可,默认72pt
  49. MarginRight string `json:"margin_right"` //PDF文档左边距,写数字即可,默认72pt
  50. MarginTop string `json:"margin_top"` //PDF文档左边距,写数字即可,默认72pt
  51. MarginBottom string `json:"margin_bottom"` //PDF文档左边距,写数字即可,默认72pt
  52. More []string `json:"more"` //更多导出选项[PDF导出选项,具体参考:https://manual.calibre-ebook.com/generated/en/ebook-convert.html#pdf-output-options]
  53. Toc []Toc `json:"toc"` //目录
  54. ///////////////////////////////////////////
  55. Order []string `json:"-"` //这个不需要赋值
  56. }
  57. var (
  58. output = "output" //文档导出文件夹
  59. ebookConvert = "ebook-convert"
  60. )
  61. // 接口文档 https://manual.calibre-ebook.com/generated/en/ebook-convert.html#table-of-contents
  62. //根据json配置文件,创建文档转化对象
  63. func NewConverter(configFile string, debug ...bool) (converter *Converter, err error) {
  64. var (
  65. cfg Config
  66. basepath string
  67. db bool
  68. )
  69. if len(debug) > 0 {
  70. db = debug[0]
  71. }
  72. if cfg, err = parseConfig(configFile); err == nil {
  73. if basepath, err = filepath.Abs(filepath.Dir(configFile)); err == nil {
  74. //设置默认值
  75. if len(cfg.Timestamp) == 0 {
  76. cfg.Timestamp = time.Now().Format("2006-01-02 15:04:05")
  77. }
  78. if len(cfg.Charset) == 0 {
  79. cfg.Charset = "utf-8"
  80. }
  81. converter = &Converter{
  82. Config: cfg,
  83. BasePath: basepath,
  84. Debug: db,
  85. }
  86. }
  87. }
  88. return
  89. }
  90. //执行文档转换
  91. func (this *Converter) Convert() (err error) {
  92. if !this.Debug { //调试模式下不删除生成的文件
  93. defer this.converterDefer() //最后移除创建的多余而文件
  94. }
  95. if err = this.generateMimeType(); err != nil {
  96. return
  97. }
  98. if err = this.generateMetaInfo(); err != nil {
  99. return
  100. }
  101. if err = this.generateTocNcx(); err != nil { //生成目录
  102. return
  103. }
  104. if err = this.generateSummary(); err != nil { //生成文档内目录
  105. return
  106. }
  107. if err = this.generateTitlePage(); err != nil { //生成封面
  108. return
  109. }
  110. if err = this.generateContentOpf(); err != nil { //这个必须是generate*系列方法的最后一个调用
  111. return
  112. }
  113. //将当前文件夹下的所有文件压缩成zip包,然后直接改名成content.epub
  114. f := filepath.Join(this.BasePath, "content.epub")
  115. fmt.Println("epub目录 " + f)
  116. os.Remove(f) //如果原文件存在了,则删除;
  117. if err = ziptil.Zip(f, this.BasePath); err == nil {
  118. //创建导出文件夹
  119. os.Mkdir(this.BasePath+"/"+output, os.ModePerm)
  120. if len(this.Config.Format) > 0 {
  121. var errs []string
  122. for _, v := range this.Config.Format {
  123. fmt.Println("convert to " + v)
  124. switch strings.ToLower(v) {
  125. case "epub":
  126. if err = this.convertToEpub(); err != nil {
  127. errs = append(errs, err.Error())
  128. fmt.Println("转换EPUB文档失败:" + err.Error())
  129. }
  130. case "mobi":
  131. if err = this.convertToMobi(); err != nil {
  132. errs = append(errs, err.Error())
  133. fmt.Println("转换MOBI文档失败:" + err.Error())
  134. }
  135. case "pdf":
  136. if err = this.convertToPdf(); err != nil {
  137. fmt.Println("转换PDF文档失败:" + err.Error())
  138. errs = append(errs, err.Error())
  139. }
  140. case "docx":
  141. if err = this.convertToDocx(); err != nil {
  142. fmt.Println("转换WORD文档失败:" + err.Error())
  143. errs = append(errs, err.Error())
  144. }
  145. }
  146. }
  147. if len(errs) > 0 {
  148. err = errors.New(strings.Join(errs, "\n"))
  149. }
  150. } else {
  151. err = this.convertToPdf()
  152. if err != nil {
  153. fmt.Println(err)
  154. }
  155. }
  156. } else {
  157. fmt.Println("压缩目录出错" + err.Error())
  158. }
  159. return
  160. }
  161. //删除生成导出文档而创建的文件
  162. func (this *Converter) converterDefer() {
  163. //删除不必要的文件
  164. os.RemoveAll(filepath.Join(this.BasePath, "META-INF"))
  165. os.RemoveAll(filepath.Join(this.BasePath, "content.epub"))
  166. os.RemoveAll(filepath.Join(this.BasePath, "mimetype"))
  167. os.RemoveAll(filepath.Join(this.BasePath, "toc.ncx"))
  168. os.RemoveAll(filepath.Join(this.BasePath, "content.opf"))
  169. os.RemoveAll(filepath.Join(this.BasePath, "titlepage.xhtml")) //封面图片待优化
  170. os.RemoveAll(filepath.Join(this.BasePath, "summary.html")) //文档目录
  171. }
  172. //生成metainfo
  173. func (this *Converter) generateMetaInfo() (err error) {
  174. xml := `<?xml version="1.0"?>
  175. <container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
  176. <rootfiles>
  177. <rootfile full-path="content.opf" media-type="application/oebps-package+xml"/>
  178. </rootfiles>
  179. </container>
  180. `
  181. folder := filepath.Join(this.BasePath, "META-INF")
  182. os.MkdirAll(folder, os.ModePerm)
  183. err = ioutil.WriteFile(filepath.Join(folder, "container.xml"), []byte(xml), os.ModePerm)
  184. return
  185. }
  186. //形成mimetyppe
  187. func (this *Converter) generateMimeType() (err error) {
  188. return ioutil.WriteFile(filepath.Join(this.BasePath, "mimetype"), []byte("application/epub+zip"), os.ModePerm)
  189. }
  190. //生成封面
  191. func (this *Converter) generateTitlePage() (err error) {
  192. if ext := strings.ToLower(filepath.Ext(this.Config.Cover)); !(ext == ".html" || ext == ".xhtml") {
  193. xml := `<?xml version='1.0' encoding='` + this.Config.Charset + `'?>
  194. <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="` + this.Config.Language + `">
  195. <head>
  196. <meta http-equiv="Content-Type" content="text/html; charset=` + this.Config.Charset + `"/>
  197. <meta name="calibre:cover" content="true"/>
  198. <title>Cover</title>
  199. <style type="text/css" title="override_css">
  200. @page {padding: 0pt; margin:0pt}
  201. body { text-align: center; padding:0pt; margin: 0pt; }
  202. </style>
  203. </head>
  204. <body>
  205. <div>
  206. <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" width="100%" height="100%" viewBox="0 0 800 1068" preserveAspectRatio="none">
  207. <image width="800" height="1068" xlink:href="` + strings.TrimPrefix(this.Config.Cover, "./") + `"/>
  208. </svg>
  209. </div>
  210. </body>
  211. </html>
  212. `
  213. if err = ioutil.WriteFile(filepath.Join(this.BasePath, "titlepage.xhtml"), []byte(xml), os.ModePerm); err == nil {
  214. this.GeneratedCover = "titlepage.xhtml"
  215. }
  216. }
  217. return
  218. }
  219. //生成文档目录
  220. func (this *Converter) generateTocNcx() (err error) {
  221. ncx := `<?xml version='1.0' encoding='` + this.Config.Charset + `'?>
  222. <ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" version="2005-1" xml:lang="%v">
  223. <head>
  224. <meta content="4" name="dtb:depth"/>
  225. <meta content="calibre (2.85.1)" name="dtb:generator"/>
  226. <meta content="0" name="dtb:totalPageCount"/>
  227. <meta content="0" name="dtb:maxPageNumber"/>
  228. </head>
  229. <docTitle>
  230. <text>%v</text>
  231. </docTitle>
  232. <navMap>%v</navMap>
  233. </ncx>
  234. `
  235. codes, _ := this.tocToXml(0, 1)
  236. ncx = fmt.Sprintf(ncx, this.Config.Language, this.Config.Title, strings.Join(codes, ""))
  237. return ioutil.WriteFile(filepath.Join(this.BasePath, "toc.ncx"), []byte(ncx), os.ModePerm)
  238. }
  239. //生成文档目录,即summary.html
  240. func (this *Converter) generateSummary() (err error) {
  241. //目录
  242. summary := `<!DOCTYPE html>
  243. <html lang="` + this.Config.Language + `">
  244. <head>
  245. <meta charset="` + this.Config.Charset + `">
  246. <title>目录</title>
  247. <style>
  248. body{margin: 0px;padding: 0px;}h1{text-align: center;padding: 0px;margin: 0px;}ul,li{list-style: none;}
  249. a{text-decoration: none;color: #4183c4;text-decoration: none;font-size: 16px;line-height: 28px;}
  250. </style>
  251. </head>
  252. <body>
  253. <h1>目&nbsp;&nbsp;&nbsp;&nbsp;录</h1>
  254. %v
  255. </body>
  256. </html>`
  257. summary = fmt.Sprintf(summary, strings.Join(this.tocToSummary(0), ""))
  258. return ioutil.WriteFile(filepath.Join(this.BasePath, "summary.html"), []byte(summary), os.ModePerm)
  259. }
  260. //将toc转成toc.ncx文件
  261. func (this *Converter) tocToXml(pid, idx int) (codes []string, next_idx int) {
  262. var code string
  263. for _, toc := range this.Config.Toc {
  264. if toc.Pid == pid {
  265. code, idx = this.getNavPoint(toc, idx)
  266. codes = append(codes, code)
  267. for _, item := range this.Config.Toc {
  268. if item.Pid == toc.Id {
  269. code, idx = this.getNavPoint(item, idx)
  270. codes = append(codes, code)
  271. var code_arr []string
  272. code_arr, idx = this.tocToXml(item.Id, idx)
  273. codes = append(codes, code_arr...)
  274. codes = append(codes, `</navPoint>`)
  275. }
  276. }
  277. codes = append(codes, `</navPoint>`)
  278. }
  279. }
  280. next_idx = idx
  281. return
  282. }
  283. //将toc转成toc.ncx文件
  284. func (this *Converter) tocToSummary(pid int) (summarys []string) {
  285. summarys = append(summarys, "<ul>")
  286. for _, toc := range this.Config.Toc {
  287. if toc.Pid == pid {
  288. summarys = append(summarys, fmt.Sprintf(`<li><a href="%v">%v</a></li>`, toc.Link, toc.Title))
  289. for _, item := range this.Config.Toc {
  290. if item.Pid == toc.Id {
  291. summarys = append(summarys, fmt.Sprintf(`<li><ul><li><a href="%v">%v</a></li>`, item.Link, item.Title))
  292. summarys = append(summarys, "<li>")
  293. summarys = append(summarys, this.tocToSummary(item.Id)...)
  294. summarys = append(summarys, "</li></ul></li>")
  295. }
  296. }
  297. }
  298. }
  299. summarys = append(summarys, "</ul>")
  300. return
  301. }
  302. //生成navPoint
  303. func (this *Converter) getNavPoint(toc Toc, idx int) (navpoint string, nextidx int) {
  304. navpoint = `
  305. <navPoint id="id%v" playOrder="%v">
  306. <navLabel>
  307. <text>%v</text>
  308. </navLabel>
  309. <content src="%v"/>`
  310. navpoint = fmt.Sprintf(navpoint, toc.Id, idx, toc.Title, toc.Link)
  311. this.Config.Order = append(this.Config.Order, toc.Link)
  312. nextidx = idx + 1
  313. return
  314. }
  315. //生成content.opf文件
  316. //倒数第二步调用
  317. func (this *Converter) generateContentOpf() (err error) {
  318. var (
  319. guide string
  320. manifest string
  321. manifestArr []string
  322. spine string //注意:如果存在封面,则需要把封面放在第一个位置
  323. spineArr []string
  324. )
  325. meta := `<dc:title>%v</dc:title>
  326. <dc:contributor opf:role="bkp">%v</dc:contributor>
  327. <dc:publisher>%v</dc:publisher>
  328. <dc:description>%v</dc:description>
  329. <dc:language>%v</dc:language>
  330. <dc:creator opf:file-as="Unknown" opf:role="aut">%v</dc:creator>
  331. <meta name="calibre:timestamp" content="%v"/>
  332. `
  333. meta = fmt.Sprintf(meta, this.Config.Title, this.Config.Contributor, this.Config.Publisher, this.Config.Description, this.Config.Language, this.Config.Creator, this.Config.Timestamp)
  334. if len(this.Config.Cover) > 0 {
  335. meta = meta + `<meta name="cover" content="cover"/>`
  336. guide = `<reference href="titlepage.xhtml" title="Cover" type="cover"/>`
  337. manifest = fmt.Sprintf(`<item href="%v" id="cover" media-type="%v"/>`, this.Config.Cover, GetMediaType(filepath.Ext(this.Config.Cover)))
  338. spineArr = append(spineArr, `<itemref idref="titlepage"/>`)
  339. }
  340. if _, err := os.Stat(this.BasePath + "/summary.html"); err == nil {
  341. spineArr = append(spineArr, `<itemref idref="summary"/>`) //目录
  342. }
  343. //扫描所有文件
  344. if files, err := filetil.ScanFiles(this.BasePath); err == nil {
  345. basePath := strings.Replace(this.BasePath, "\\", "/", -1)
  346. for _, file := range files {
  347. if !file.IsDir {
  348. ext := strings.ToLower(filepath.Ext(file.Path))
  349. sourcefile := strings.TrimPrefix(file.Path, basePath+"/")
  350. id := "ncx"
  351. if ext != ".ncx" {
  352. if file.Name == "titlepage.xhtml" { //封面
  353. id = "titlepage"
  354. } else if file.Name == "summary.html" { //目录
  355. id = "summary"
  356. } else {
  357. id = cryptil.Md5Crypt(sourcefile)
  358. }
  359. }
  360. if mt := GetMediaType(ext); mt != "" { //不是封面图片,且media-type不为空
  361. if sourcefile != strings.TrimLeft(this.Config.Cover, "./") { //不是封面图片,则追加进来。封面图片前面已经追加进来了
  362. manifestArr = append(manifestArr, fmt.Sprintf(`<item href="%v" id="%v" media-type="%v"/>`, sourcefile, id, mt))
  363. }
  364. }
  365. } else {
  366. fmt.Println(file.Path)
  367. }
  368. }
  369. items := make(map[string]string)
  370. for _, link := range this.Config.Order {
  371. id := cryptil.Md5Crypt(link)
  372. if _, ok := items[id]; !ok { //去重
  373. items[id] = id
  374. spineArr = append(spineArr, fmt.Sprintf(`<itemref idref="%v"/>`, id))
  375. }
  376. }
  377. manifest = manifest + strings.Join(manifestArr, "\n")
  378. spine = strings.Join(spineArr, "\n")
  379. } else {
  380. return err
  381. }
  382. pkg := `<?xml version='1.0' encoding='` + this.Config.Charset + `'?>
  383. <package xmlns="http://www.idpf.org/2007/opf" unique-identifier="uuid_id" version="2.0">
  384. <metadata xmlns:opf="http://www.idpf.org/2007/opf" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:calibre="http://calibre.kovidgoyal.net/2009/metadata">
  385. %v
  386. </metadata>
  387. <manifest>
  388. %v
  389. </manifest>
  390. <spine toc="ncx">
  391. %v
  392. </spine>
  393. %v
  394. </package>
  395. `
  396. if len(guide) > 0 {
  397. guide = `<guide>` + guide + `</guide>`
  398. }
  399. pkg = fmt.Sprintf(pkg, meta, manifest, spine, guide)
  400. return ioutil.WriteFile(filepath.Join(this.BasePath, "content.opf"), []byte(pkg), os.ModePerm)
  401. }
  402. //转成epub
  403. func (this *Converter) convertToEpub() (err error) {
  404. args := []string{
  405. filepath.Join(this.BasePath, "content.epub"),
  406. filepath.Join(this.BasePath, output, "book.epub"),
  407. }
  408. cmd := exec.Command(ebookConvert, args...)
  409. if this.Debug {
  410. fmt.Println(cmd.Args)
  411. }
  412. return cmd.Run()
  413. }
  414. //转成mobi
  415. func (this *Converter) convertToMobi() (err error) {
  416. args := []string{
  417. filepath.Join(this.BasePath, "content.epub"),
  418. filepath.Join(this.BasePath, output, "book.mobi"),
  419. }
  420. cmd := exec.Command(ebookConvert, args...)
  421. if this.Debug {
  422. fmt.Println(cmd.Args)
  423. }
  424. return cmd.Run()
  425. }
  426. //转成pdf
  427. func (this *Converter) convertToPdf() (err error) {
  428. args := []string{
  429. filepath.Join(this.BasePath, "content.epub"),
  430. filepath.Join(this.BasePath, output, "book.pdf"),
  431. }
  432. //页面大小
  433. if len(this.Config.PaperSize) > 0 {
  434. args = append(args, "--paper-size", this.Config.PaperSize)
  435. }
  436. //文字大小
  437. if len(this.Config.FontSize) > 0 {
  438. args = append(args, "--pdf-default-font-size", this.Config.FontSize)
  439. }
  440. //header template
  441. if len(this.Config.Header) > 0 {
  442. args = append(args, "--pdf-header-template", this.Config.Header)
  443. }
  444. //footer template
  445. if len(this.Config.Footer) > 0 {
  446. args = append(args, "--pdf-footer-template", this.Config.Footer)
  447. }
  448. if len(this.Config.MarginLeft) > 0 {
  449. args = append(args, "--pdf-page-margin-left", this.Config.MarginLeft)
  450. }
  451. if len(this.Config.MarginTop) > 0 {
  452. args = append(args, "--pdf-page-margin-top", this.Config.MarginTop)
  453. }
  454. if len(this.Config.MarginRight) > 0 {
  455. args = append(args, "--pdf-page-margin-right", this.Config.MarginRight)
  456. }
  457. if len(this.Config.MarginBottom) > 0 {
  458. args = append(args, "--pdf-page-margin-bottom", this.Config.MarginBottom)
  459. }
  460. //更多选项
  461. if len(this.Config.More) > 0 {
  462. args = append(args, this.Config.More...)
  463. }
  464. cmd := exec.Command(ebookConvert, args...)
  465. if this.Debug {
  466. fmt.Println(cmd.Args)
  467. }
  468. return cmd.Run()
  469. }
  470. // 转成word
  471. func (this *Converter) convertToDocx() (err error) {
  472. args := []string{
  473. this.BasePath + "/content.epub",
  474. this.BasePath + "/" + output + "/book.docx",
  475. }
  476. args = append(args, "--docx-no-toc")
  477. //页面大小
  478. if len(this.Config.PaperSize) > 0 {
  479. args = append(args, "--docx-page-size", this.Config.PaperSize)
  480. }
  481. if len(this.Config.MarginLeft) > 0 {
  482. args = append(args, "--docx-page-margin-left", this.Config.MarginLeft)
  483. }
  484. if len(this.Config.MarginTop) > 0 {
  485. args = append(args, "--docx-page-margin-top", this.Config.MarginTop)
  486. }
  487. if len(this.Config.MarginRight) > 0 {
  488. args = append(args, "--docx-page-margin-right", this.Config.MarginRight)
  489. }
  490. if len(this.Config.MarginBottom) > 0 {
  491. args = append(args, "--docx-page-margin-bottom", this.Config.MarginBottom)
  492. }
  493. cmd := exec.Command(ebookConvert, args...)
  494. if this.Debug {
  495. fmt.Println(cmd.Args)
  496. }
  497. return cmd.Run()
  498. }