Pārlūkot izejas kodu

add support for importing docx file

Augists 3 gadi atpakaļ
vecāks
revīzija
a877083e96
4 mainītis faili ar 606 papildinājumiem un 6 dzēšanām
  1. 8 4
      controllers/BookController.go
  2. 46 1
      models/BookModel.go
  3. 551 0
      utils/docx2md.go
  4. 1 1
      views/book/index.tpl

+ 8 - 4
controllers/BookController.go

@@ -340,7 +340,7 @@ func (c *BookController) UploadCover() {
 	fileName := "cover_" + strconv.FormatInt(time.Now().UnixNano(), 16)
 
 	//附件路径按照项目组织
-// 	filePath := filepath.Join("uploads", book.Identify, "images", fileName+ext)
+	// 	filePath := filepath.Join("uploads", book.Identify, "images", fileName+ext)
 	filePath := filepath.Join(conf.WorkingDirectory, "uploads", book.Identify, "images", fileName+ext)
 
 	path := filepath.Dir(filePath)
@@ -571,7 +571,7 @@ func (c *BookController) Copy() {
 	}
 }
 
-//导入zip压缩包
+// 导入zip压缩包或docx
 func (c *BookController) Import() {
 
 	file, moreFile, err := c.GetFile("import-file")
@@ -608,7 +608,7 @@ func (c *BookController) Import() {
 
 	ext := filepath.Ext(moreFile.Filename)
 
-	if !strings.EqualFold(ext, ".zip") {
+	if !strings.EqualFold(ext, ".zip") && !strings.EqualFold(ext, ".docx") {
 		c.JsonResult(6004, "不支持的文件类型")
 	}
 
@@ -643,7 +643,11 @@ func (c *BookController) Import() {
 	book.Editor = "markdown"
 	book.Theme = "default"
 
-	go book.ImportBook(tempPath, c.Lang)
+	if strings.EqualFold(ext, ".zip") {
+		go book.ImportBook(tempPath, c.Lang)
+	} else if strings.EqualFold(ext, ".docx") {
+		go book.ImportWordBook(tempPath, c.Lang)
+	}
 
 	logs.Info("用户[", c.Member.Account, "]导入了项目 ->", book)
 

+ 46 - 1
models/BookModel.go

@@ -680,7 +680,7 @@ func (book *Book) ResetDocumentNumber(bookId int) {
 	}
 }
 
-//导入项目
+// 导入zip项目
 func (book *Book) ImportBook(zipPath string, lang string) error {
 	if !filetil.FileExists(zipPath) {
 		return errors.New("文件不存在 => " + zipPath)
@@ -978,6 +978,51 @@ func (book *Book) ImportBook(zipPath string, lang string) error {
 	return err
 }
 
+// 导入docx项目
+func (book *Book) ImportWordBook(docxPath string, lang string) error {
+	if !filetil.FileExists(docxPath) {
+		return errors.New("文件不存在")
+	}
+  docxPath = strings.Replace(docxPath, "\\", "/", -1)
+
+	o := orm.NewOrm()
+
+	o.Insert(book)
+	relationship := NewRelationship()
+	relationship.BookId = book.BookId
+	relationship.RoldId = 0
+	relationship.MemberId = book.MemberId
+	relationship.Insert()
+
+  doc := NewDocument()
+  doc.BookId = book.BookId
+  doc.MemberId = book.MemberId
+  docIdentify := strings.Replace(strings.TrimPrefix(docxPath, os.TempDir()+"/"), "/", "-", -1)
+
+  if ok, err := regexp.MatchString(`[a-z]+[a-zA-Z0-9_.\-]*$`, docIdentify); !ok || err != nil {
+    docIdentify = "import-" + docIdentify
+  }
+
+  doc.Identify = docIdentify
+
+	if doc.Markdown, err := util.Docx2md(docxPath, false); err != nil {
+    logs.Error("导入doc项目转换异常 => ", err)
+  }
+
+  doc.Content = string(blackfriday.Run([]byte(doc.Markdown)))
+
+  doc.Version = time.Now().Unix()
+
+  for _, line := range strings.Split(doc.Markdown, "\n") {
+    if strings.HasPrefix(line, "#") {
+      docName := strings.TrimLeft(line, "#")
+      break
+    }
+  }
+
+  doc.DocumentName = strings.TrimSpace(docName)
+}
+
 func (book *Book) FindForRoleId(bookId, memberId int) (conf.BookRole, error) {
 	o := orm.NewOrm()
 

+ 551 - 0
utils/docx2md.go

@@ -0,0 +1,551 @@
+// https://github.com/mattn/docx2md
+// License MIT
+package util
+
+import (
+	"archive/zip"
+	"bytes"
+	"encoding/base64"
+	"encoding/xml"
+	"errors"
+	"flag"
+	"fmt"
+	"io"
+	"io/ioutil"
+	"log"
+	"os"
+	"path"
+	"path/filepath"
+	"runtime"
+	"strconv"
+	"strings"
+
+	"github.com/mattn/go-runewidth"
+)
+
+// Relationship is
+type Relationship struct {
+	Text       string `xml:",chardata"`
+	ID         string `xml:"Id,attr"`
+	Type       string `xml:"Type,attr"`
+	Target     string `xml:"Target,attr"`
+	TargetMode string `xml:"TargetMode,attr"`
+}
+
+// Relationships is
+type Relationships struct {
+	XMLName      xml.Name       `xml:"Relationships"`
+	Text         string         `xml:",chardata"`
+	Xmlns        string         `xml:"xmlns,attr"`
+	Relationship []Relationship `xml:"Relationship"`
+}
+
+// TextVal is
+type TextVal struct {
+	Text string `xml:",chardata"`
+	Val  string `xml:"val,attr"`
+}
+
+// NumberingLvl is
+type NumberingLvl struct {
+	Text      string  `xml:",chardata"`
+	Ilvl      string  `xml:"ilvl,attr"`
+	Tplc      string  `xml:"tplc,attr"`
+	Tentative string  `xml:"tentative,attr"`
+	Start     TextVal `xml:"start"`
+	NumFmt    TextVal `xml:"numFmt"`
+	LvlText   TextVal `xml:"lvlText"`
+	LvlJc     TextVal `xml:"lvlJc"`
+	PPr       struct {
+		Text string `xml:",chardata"`
+		Ind  struct {
+			Text    string `xml:",chardata"`
+			Left    string `xml:"left,attr"`
+			Hanging string `xml:"hanging,attr"`
+		} `xml:"ind"`
+	} `xml:"pPr"`
+	RPr struct {
+		Text string `xml:",chardata"`
+		U    struct {
+			Text string `xml:",chardata"`
+			Val  string `xml:"val,attr"`
+		} `xml:"u"`
+		RFonts struct {
+			Text string `xml:",chardata"`
+			Hint string `xml:"hint,attr"`
+		} `xml:"rFonts"`
+	} `xml:"rPr"`
+}
+
+// Numbering is
+type Numbering struct {
+	XMLName     xml.Name `xml:"numbering"`
+	Text        string   `xml:",chardata"`
+	Wpc         string   `xml:"wpc,attr"`
+	Cx          string   `xml:"cx,attr"`
+	Cx1         string   `xml:"cx1,attr"`
+	Mc          string   `xml:"mc,attr"`
+	O           string   `xml:"o,attr"`
+	R           string   `xml:"r,attr"`
+	M           string   `xml:"m,attr"`
+	V           string   `xml:"v,attr"`
+	Wp14        string   `xml:"wp14,attr"`
+	Wp          string   `xml:"wp,attr"`
+	W10         string   `xml:"w10,attr"`
+	W           string   `xml:"w,attr"`
+	W14         string   `xml:"w14,attr"`
+	W15         string   `xml:"w15,attr"`
+	W16se       string   `xml:"w16se,attr"`
+	Wpg         string   `xml:"wpg,attr"`
+	Wpi         string   `xml:"wpi,attr"`
+	Wne         string   `xml:"wne,attr"`
+	Wps         string   `xml:"wps,attr"`
+	Ignorable   string   `xml:"Ignorable,attr"`
+	AbstractNum []struct {
+		Text                       string         `xml:",chardata"`
+		AbstractNumID              string         `xml:"abstractNumId,attr"`
+		RestartNumberingAfterBreak string         `xml:"restartNumberingAfterBreak,attr"`
+		Nsid                       TextVal        `xml:"nsid"`
+		MultiLevelType             TextVal        `xml:"multiLevelType"`
+		Tmpl                       TextVal        `xml:"tmpl"`
+		Lvl                        []NumberingLvl `xml:"lvl"`
+	} `xml:"abstractNum"`
+	Num []struct {
+		Text          string  `xml:",chardata"`
+		NumID         string  `xml:"numId,attr"`
+		AbstractNumID TextVal `xml:"abstractNumId"`
+	} `xml:"num"`
+}
+
+type file struct {
+	rels  Relationships
+	num   Numbering
+	r     *zip.ReadCloser
+	embed bool
+	list  map[string]int
+}
+
+// Node is
+type Node struct {
+	XMLName xml.Name
+	Attrs   []xml.Attr `xml:"-"`
+	Content []byte     `xml:",innerxml"`
+	Nodes   []Node     `xml:",any"`
+}
+
+// UnmarshalXML is
+func (n *Node) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
+	n.Attrs = start.Attr
+	type node Node
+
+	return d.DecodeElement((*node)(n), &start)
+}
+
+func escape(s, set string) string {
+	replacer := []string{}
+	for _, r := range []rune(set) {
+		rs := string(r)
+		replacer = append(replacer, rs, `\`+rs)
+	}
+	return strings.NewReplacer(replacer...).Replace(s)
+}
+
+func (zf *file) extract(rel *Relationship, w io.Writer) error {
+	err := os.MkdirAll(filepath.Dir(rel.Target), 0755)
+	if err != nil {
+		return err
+	}
+	for _, f := range zf.r.File {
+		if f.Name != "word/"+rel.Target {
+			continue
+		}
+		rc, err := f.Open()
+		if err != nil {
+			return err
+		}
+		defer rc.Close()
+
+		b := make([]byte, f.UncompressedSize64)
+		n, err := rc.Read(b)
+		if err != nil && err != io.EOF {
+			return err
+		}
+		if zf.embed {
+			fmt.Fprintf(w, "![](data:image/png;base64,%s)",
+				base64.StdEncoding.EncodeToString(b[:n]))
+		} else {
+			err = ioutil.WriteFile(rel.Target, b, 0644)
+			if err != nil {
+				return err
+			}
+			fmt.Fprintf(w, "![](%s)", escape(rel.Target, "()"))
+		}
+		break
+	}
+	return nil
+}
+
+func attr(attrs []xml.Attr, name string) (string, bool) {
+	for _, attr := range attrs {
+		if attr.Name.Local == name {
+			return attr.Value, true
+		}
+	}
+	return "", false
+}
+
+func (zf *file) walk(node *Node, w io.Writer) error {
+	switch node.XMLName.Local {
+	case "hyperlink":
+		fmt.Fprint(w, "[")
+		var cbuf bytes.Buffer
+		for _, n := range node.Nodes {
+			if err := zf.walk(&n, &cbuf); err != nil {
+				return err
+			}
+		}
+		fmt.Fprint(w, escape(cbuf.String(), "[]"))
+		fmt.Fprint(w, "]")
+
+		fmt.Fprint(w, "(")
+		if id, ok := attr(node.Attrs, "id"); ok {
+			for _, rel := range zf.rels.Relationship {
+				if id == rel.ID {
+					fmt.Fprint(w, escape(rel.Target, "()"))
+					break
+				}
+			}
+		}
+		fmt.Fprint(w, ")")
+	case "t":
+		fmt.Fprint(w, string(node.Content))
+	case "pPr":
+		code := false
+		for _, n := range node.Nodes {
+			switch n.XMLName.Local {
+			case "ind":
+				if left, ok := attr(n.Attrs, "left"); ok {
+					if i, err := strconv.Atoi(left); err == nil && i > 0 {
+						fmt.Fprint(w, strings.Repeat("  ", i/360))
+					}
+				}
+			case "pStyle":
+				if val, ok := attr(n.Attrs, "val"); ok {
+					if strings.HasPrefix(val, "Heading") {
+						if i, err := strconv.Atoi(val[7:]); err == nil && i > 0 {
+							fmt.Fprint(w, strings.Repeat("#", i)+" ")
+						}
+					} else if val == "Code" {
+						code = true
+					} else {
+						if i, err := strconv.Atoi(val); err == nil && i > 0 {
+							fmt.Fprint(w, strings.Repeat("#", i)+" ")
+						}
+					}
+				}
+			case "numPr":
+				numID := ""
+				ilvl := ""
+				numFmt := ""
+				start := 1
+				ind := 0
+				for _, nn := range n.Nodes {
+					if nn.XMLName.Local == "numId" {
+						if val, ok := attr(nn.Attrs, "val"); ok {
+							numID = val
+						}
+					}
+					if nn.XMLName.Local == "ilvl" {
+						if val, ok := attr(nn.Attrs, "val"); ok {
+							ilvl = val
+						}
+					}
+				}
+				for _, num := range zf.num.Num {
+					if numID != num.NumID {
+						continue
+					}
+					for _, abnum := range zf.num.AbstractNum {
+						if abnum.AbstractNumID != num.AbstractNumID.Val {
+							continue
+						}
+						for _, ablvl := range abnum.Lvl {
+							if ablvl.Ilvl != ilvl {
+								continue
+							}
+							if i, err := strconv.Atoi(ablvl.Start.Val); err == nil {
+								start = i
+							}
+							if i, err := strconv.Atoi(ablvl.PPr.Ind.Left); err == nil {
+								ind = i / 360
+							}
+							numFmt = ablvl.NumFmt.Val
+							break
+						}
+						break
+					}
+					break
+				}
+
+				fmt.Fprint(w, strings.Repeat("  ", ind))
+				switch numFmt {
+				case "decimal", "aiueoFullWidth":
+					key := fmt.Sprintf("%s:%d", numID, ind)
+					cur, ok := zf.list[key]
+					if !ok {
+						zf.list[key] = start
+					} else {
+						zf.list[key] = cur + 1
+					}
+					fmt.Fprintf(w, "%d. ", zf.list[key])
+				case "bullet":
+					fmt.Fprint(w, "* ")
+				}
+			}
+		}
+		if code {
+			fmt.Fprint(w, "`")
+		}
+		for _, n := range node.Nodes {
+			if err := zf.walk(&n, w); err != nil {
+				return err
+			}
+		}
+		if code {
+			fmt.Fprint(w, "`")
+		}
+	case "tbl":
+		var rows [][]string
+		for _, tr := range node.Nodes {
+			if tr.XMLName.Local != "tr" {
+				continue
+			}
+			var cols []string
+			for _, tc := range tr.Nodes {
+				if tc.XMLName.Local != "tc" {
+					continue
+				}
+				var cbuf bytes.Buffer
+				if err := zf.walk(&tc, &cbuf); err != nil {
+					return err
+				}
+				cols = append(cols, strings.Replace(cbuf.String(), "\n", "", -1))
+			}
+			rows = append(rows, cols)
+		}
+		maxcol := 0
+		for _, cols := range rows {
+			if len(cols) > maxcol {
+				maxcol = len(cols)
+			}
+		}
+		widths := make([]int, maxcol)
+		for _, row := range rows {
+			for i := 0; i < maxcol; i++ {
+				if i < len(row) {
+					width := runewidth.StringWidth(row[i])
+					if widths[i] < width {
+						widths[i] = width
+					}
+				}
+			}
+		}
+		for i, row := range rows {
+			if i == 0 {
+				for j := 0; j < maxcol; j++ {
+					fmt.Fprint(w, "|")
+					fmt.Fprint(w, strings.Repeat(" ", widths[j]))
+				}
+				fmt.Fprint(w, "|\n")
+				for j := 0; j < maxcol; j++ {
+					fmt.Fprint(w, "|")
+					fmt.Fprint(w, strings.Repeat("-", widths[j]))
+				}
+				fmt.Fprint(w, "|\n")
+			}
+			for j := 0; j < maxcol; j++ {
+				fmt.Fprint(w, "|")
+				if j < len(row) {
+					width := runewidth.StringWidth(row[j])
+					fmt.Fprint(w, escape(row[j], "|"))
+					fmt.Fprint(w, strings.Repeat(" ", widths[j]-width))
+				} else {
+					fmt.Fprint(w, strings.Repeat(" ", widths[j]))
+				}
+			}
+			fmt.Fprint(w, "|\n")
+		}
+		fmt.Fprint(w, "\n")
+	case "r":
+		bold := false
+		italic := false
+		strike := false
+		for _, n := range node.Nodes {
+			if n.XMLName.Local != "rPr" {
+				continue
+			}
+			for _, nn := range n.Nodes {
+				switch nn.XMLName.Local {
+				case "b":
+					bold = true
+				case "i":
+					italic = true
+				case "strike":
+					strike = true
+				}
+			}
+		}
+		if strike {
+			fmt.Fprint(w, "~~")
+		}
+		if bold {
+			fmt.Fprint(w, "**")
+		}
+		if italic {
+			fmt.Fprint(w, "*")
+		}
+		var cbuf bytes.Buffer
+		for _, n := range node.Nodes {
+			if err := zf.walk(&n, &cbuf); err != nil {
+				return err
+			}
+		}
+		fmt.Fprint(w, escape(cbuf.String(), `*~\`))
+		if italic {
+			fmt.Fprint(w, "*")
+		}
+		if bold {
+			fmt.Fprint(w, "**")
+		}
+		if strike {
+			fmt.Fprint(w, "~~")
+		}
+	case "p":
+		for _, n := range node.Nodes {
+			if err := zf.walk(&n, w); err != nil {
+				return err
+			}
+		}
+		fmt.Fprintln(w)
+	case "blip":
+		if id, ok := attr(node.Attrs, "embed"); ok {
+			for _, rel := range zf.rels.Relationship {
+				if id != rel.ID {
+					continue
+				}
+				if err := zf.extract(&rel, w); err != nil {
+					return err
+				}
+			}
+		}
+	case "Fallback":
+	case "txbxContent":
+		var cbuf bytes.Buffer
+		for _, n := range node.Nodes {
+			if err := zf.walk(&n, &cbuf); err != nil {
+				return err
+			}
+		}
+		fmt.Fprintln(w, "\n```\n"+cbuf.String()+"```")
+	default:
+		for _, n := range node.Nodes {
+			if err := zf.walk(&n, w); err != nil {
+				return err
+			}
+		}
+	}
+
+	return nil
+}
+
+func readFile(f *zip.File) (*Node, error) {
+	rc, err := f.Open()
+	defer rc.Close()
+
+	b, _ := ioutil.ReadAll(rc)
+	if err != nil {
+		return nil, err
+	}
+
+	var node Node
+	err = xml.Unmarshal(b, &node)
+	if err != nil {
+		return nil, err
+	}
+	return &node, nil
+}
+
+func findFile(files []*zip.File, target string) *zip.File {
+	for _, f := range files {
+		if ok, _ := path.Match(target, f.Name); ok {
+			return f
+		}
+	}
+	return nil
+}
+
+func Docx2md(arg string, embed bool) (string, error) {
+	r, err := zip.OpenReader(arg)
+	if err != nil {
+		return err
+	}
+	defer r.Close()
+
+	var rels Relationships
+	var num Numbering
+
+	for _, f := range r.File {
+		switch f.Name {
+		case "word/_rels/document.xml.rels":
+			rc, err := f.Open()
+			defer rc.Close()
+
+			b, _ := ioutil.ReadAll(rc)
+			if err != nil {
+				return err
+			}
+
+			err = xml.Unmarshal(b, &rels)
+			if err != nil {
+				return err
+			}
+		case "word/numbering.xml":
+			rc, err := f.Open()
+			defer rc.Close()
+
+			b, _ := ioutil.ReadAll(rc)
+			if err != nil {
+				return err
+			}
+
+			err = xml.Unmarshal(b, &num)
+			if err != nil {
+				return err
+			}
+		}
+	}
+
+	f := findFile(r.File, "word/document*.xml")
+	if f == nil {
+		return errors.New("incorrect document")
+	}
+	node, err := readFile(f)
+	if err != nil {
+		return err
+	}
+
+	var buf bytes.Buffer
+	zf := &file{
+		r:     r,
+		rels:  rels,
+		num:   num,
+		embed: embed,
+		list:  make(map[string]int),
+	}
+	err = zf.walk(node, &buf)
+	if err != nil {
+		return nil, err
+	}
+
+	return buf.String(), nil
+}

+ 1 - 1
views/book/index.tpl

@@ -465,7 +465,7 @@
                 'required': true,
                 'validateInitialCount': true,
                 "language" : "{{i18n $.Lang "common.upload_lang"}}",
-                'allowedFileExtensions': ['zip'],
+                'allowedFileExtensions': ['zip', 'docx'],
                 'msgPlaceholder' : '{{i18n $.Lang "message.file_type_placeholder"}}',
                 'elErrorContainer' : "#import-book-form-error-message",
                 'uploadExtraData' : function () {