DocumentModel.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396
  1. package models
  2. import (
  3. "time"
  4. "fmt"
  5. "strconv"
  6. "bytes"
  7. "os"
  8. "path/filepath"
  9. "strings"
  10. "github.com/PuerkitoBio/goquery"
  11. "github.com/beego/beego/v2/client/orm"
  12. "github.com/beego/beego/v2/core/logs"
  13. "github.com/beego/beego/v2/server/web"
  14. "github.com/mindoc-org/mindoc/cache"
  15. "github.com/mindoc-org/mindoc/conf"
  16. "github.com/mindoc-org/mindoc/utils"
  17. )
  18. // Document struct.
  19. type Document struct {
  20. DocumentId int `orm:"pk;auto;unique;column(document_id)" json:"doc_id"`
  21. DocumentName string `orm:"column(document_name);size(500)" json:"doc_name"`
  22. // Identify 文档唯一标识
  23. Identify string `orm:"column(identify);size(100);index;null;default(null)" json:"identify"`
  24. BookId int `orm:"column(book_id);type(int);index" json:"book_id"`
  25. ParentId int `orm:"column(parent_id);type(int);index;default(0)" json:"parent_id"`
  26. OrderSort int `orm:"column(order_sort);default(0);type(int);index" json:"order_sort"`
  27. // Markdown markdown格式文档.
  28. Markdown string `orm:"column(markdown);type(text);null" json:"markdown"`
  29. // Release 发布后的Html格式内容.
  30. Release string `orm:"column(release);type(text);null" json:"release"`
  31. // Content 未发布的 Html 格式内容.
  32. Content string `orm:"column(content);type(text);null" json:"content"`
  33. CreateTime time.Time `orm:"column(create_time);type(datetime);auto_now_add" json:"create_time"`
  34. MemberId int `orm:"column(member_id);type(int)" json:"member_id"`
  35. ModifyTime time.Time `orm:"column(modify_time);type(datetime);auto_now" json:"modify_time"`
  36. ModifyAt int `orm:"column(modify_at);type(int)" json:"-"`
  37. Version int64 `orm:"column(version);type(bigint);" json:"version"`
  38. //是否展开子目录:0 否/1 是 /2 空间节点,单击时展开下一级
  39. IsOpen int `orm:"column(is_open);type(int);default(0)" json:"is_open"`
  40. ViewCount int `orm:"column(view_count);type(int)" json:"view_count"`
  41. AttachList []*Attachment `orm:"-" json:"attach"`
  42. }
  43. // 多字段唯一键
  44. func (item *Document) TableUnique() [][]string {
  45. return [][]string{
  46. []string{"book_id", "identify"},
  47. }
  48. }
  49. // TableName 获取对应数据库表名.
  50. func (item *Document) TableName() string {
  51. return "documents"
  52. }
  53. // TableEngine 获取数据使用的引擎.
  54. func (item *Document) TableEngine() string {
  55. return "INNODB"
  56. }
  57. func (item *Document) TableNameWithPrefix() string {
  58. return conf.GetDatabasePrefix() + item.TableName()
  59. }
  60. func NewDocument() *Document {
  61. return &Document{
  62. Version: time.Now().Unix(),
  63. }
  64. }
  65. //根据文档ID查询指定文档.
  66. func (item *Document) Find(id int) (*Document, error) {
  67. if id <= 0 {
  68. return item, ErrInvalidParameter
  69. }
  70. o := orm.NewOrm()
  71. err := o.QueryTable(item.TableNameWithPrefix()).Filter("document_id", id).One(item)
  72. if err == orm.ErrNoRows {
  73. return item, ErrDataNotExist
  74. }
  75. return item, nil
  76. }
  77. //插入和更新文档.
  78. func (item *Document) InsertOrUpdate(cols ...string) error {
  79. o := orm.NewOrm()
  80. item.DocumentName = utils.StripTags(item.DocumentName)
  81. var err error
  82. if item.DocumentId > 0 {
  83. _, err = o.Update(item, cols...)
  84. } else {
  85. if item.Identify == "" {
  86. book := NewBook()
  87. identify := "docs"
  88. if err := o.QueryTable(book.TableNameWithPrefix()).Filter("book_id", item.BookId).One(book, "identify"); err == nil {
  89. identify = book.Identify
  90. }
  91. item.Identify = fmt.Sprintf("%s-%s", identify, strconv.FormatInt(time.Now().UnixNano(), 32))
  92. }
  93. if item.OrderSort == 0 {
  94. sort, _ := o.QueryTable(item.TableNameWithPrefix()).Filter("book_id", item.BookId).Filter("parent_id", item.ParentId).Count()
  95. item.OrderSort = int(sort) + 1
  96. }
  97. _, err = o.Insert(item)
  98. NewBook().ResetDocumentNumber(item.BookId)
  99. }
  100. if err != nil {
  101. return err
  102. }
  103. return nil
  104. }
  105. //根据文档识别编号和项目id获取一篇文档
  106. func (item *Document) FindByIdentityFirst(identify string, bookId int) (*Document, error) {
  107. o := orm.NewOrm()
  108. err := o.QueryTable(item.TableNameWithPrefix()).Filter("book_id", bookId).Filter("identify", identify).One(item)
  109. return item, err
  110. }
  111. //递归删除一个文档.
  112. func (item *Document) RecursiveDocument(docId int) error {
  113. o := orm.NewOrm()
  114. if doc, err := item.Find(docId); err == nil {
  115. o.Delete(doc)
  116. NewDocumentHistory().Clear(doc.DocumentId)
  117. }
  118. var maps []orm.Params
  119. _, err := o.Raw("SELECT document_id FROM " + item.TableNameWithPrefix() + " WHERE parent_id=" + strconv.Itoa(docId)).Values(&maps)
  120. if err != nil {
  121. logs.Error("RecursiveDocument => ", err)
  122. return err
  123. }
  124. for _, param := range maps {
  125. if docId, ok := param["document_id"].(string); ok {
  126. id, _ := strconv.Atoi(docId)
  127. o.QueryTable(item.TableNameWithPrefix()).Filter("document_id", id).Delete()
  128. item.RecursiveDocument(id)
  129. }
  130. }
  131. return nil
  132. }
  133. //将文档写入缓存
  134. func (item *Document) PutToCache() {
  135. go func(m Document) {
  136. if m.Identify == "" {
  137. if err := cache.Put("Document.Id."+strconv.Itoa(m.DocumentId), m, time.Second*3600); err != nil {
  138. logs.Info("文档缓存失败:", m.DocumentId)
  139. }
  140. } else {
  141. if err := cache.Put(fmt.Sprintf("Document.BookId.%d.Identify.%s", m.BookId, m.Identify), m, time.Second*3600); err != nil {
  142. logs.Info("文档缓存失败:", m.DocumentId)
  143. }
  144. }
  145. }(*item)
  146. }
  147. //清除缓存
  148. func (item *Document) RemoveCache() {
  149. go func(m Document) {
  150. cache.Put("Document.Id."+strconv.Itoa(m.DocumentId), m, time.Second*3600)
  151. if m.Identify != "" {
  152. cache.Put(fmt.Sprintf("Document.BookId.%d.Identify.%s", m.BookId, m.Identify), m, time.Second*3600)
  153. }
  154. }(*item)
  155. }
  156. //从缓存获取
  157. func (item *Document) FromCacheById(id int) (*Document, error) {
  158. if err := cache.Get("Document.Id."+strconv.Itoa(id), &item); err == nil && item.DocumentId > 0 {
  159. logs.Info("从缓存中获取文档信息成功 ->", item.DocumentId)
  160. return item, nil
  161. }
  162. if item.DocumentId > 0 {
  163. item.PutToCache()
  164. }
  165. item, err := item.Find(id)
  166. if err == nil {
  167. item.PutToCache()
  168. }
  169. return item, err
  170. }
  171. //根据文档标识从缓存中查询文档
  172. func (item *Document) FromCacheByIdentify(identify string, bookId int) (*Document, error) {
  173. key := fmt.Sprintf("Document.BookId.%d.Identify.%s", bookId, identify)
  174. if err := cache.Get(key, item); err == nil && item.DocumentId > 0 {
  175. logs.Info("从缓存中获取文档信息成功 ->", key)
  176. return item, nil
  177. }
  178. defer func() {
  179. if item.DocumentId > 0 {
  180. item.PutToCache()
  181. }
  182. }()
  183. return item.FindByIdentityFirst(identify, bookId)
  184. }
  185. //根据项目ID查询文档列表.
  186. func (item *Document) FindListByBookId(bookId int) (docs []*Document, err error) {
  187. o := orm.NewOrm()
  188. _, err = o.QueryTable(item.TableNameWithPrefix()).Filter("book_id", bookId).OrderBy("order_sort").All(&docs)
  189. return
  190. }
  191. //判断文章是否存在
  192. func (item *Document) IsExist(documentId int) bool {
  193. o := orm.NewOrm()
  194. return o.QueryTable(item.TableNameWithPrefix()).Filter("document_id", documentId).Exist()
  195. }
  196. //发布单篇文档
  197. func (item *Document) ReleaseContent() error {
  198. item.Release = strings.TrimSpace(item.Content)
  199. err := item.Processor().InsertOrUpdate("release")
  200. if err != nil {
  201. logs.Error(fmt.Sprintf("发布失败 -> %+v", item), err)
  202. return err
  203. }
  204. //当文档发布后,需要清除已缓存的转换文档和文档缓存
  205. item.RemoveCache()
  206. if err := os.RemoveAll(filepath.Join(conf.WorkingDirectory, "uploads", "books", strconv.Itoa(item.BookId))); err != nil {
  207. logs.Error("删除已缓存的文档目录失败 -> ", filepath.Join(conf.WorkingDirectory, "uploads", "books", strconv.Itoa(item.BookId)))
  208. return err
  209. }
  210. return nil
  211. }
  212. //处理文档的外链,附件,底部编辑信息等.
  213. func (item *Document) Processor() *Document {
  214. if item.Release != "" {
  215. item.Release = utils.SafetyProcessor(item.Release)
  216. //安全过滤,移除危险标签和属性
  217. if docQuery, err := goquery.NewDocumentFromReader(bytes.NewBufferString(item.Release)); err == nil {
  218. //处理附件
  219. if selector := docQuery.Find("div.attach-list").First(); selector.Size() <= 0 {
  220. //处理附件
  221. attachList, err := NewAttachment().FindListByDocumentId(item.DocumentId)
  222. if err == nil && len(attachList) > 0 {
  223. content := bytes.NewBufferString("<div class=\"attach-list\"><strong>附件</strong><ul>")
  224. for _, attach := range attachList {
  225. if strings.HasPrefix(attach.HttpPath, "/") {
  226. attach.HttpPath = strings.TrimSuffix(conf.BaseUrl, "/") + attach.HttpPath
  227. }
  228. li := fmt.Sprintf("<li><a href=\"%s\" target=\"_blank\" title=\"%s\">%s</a></li>", attach.HttpPath, attach.FileName, attach.FileName)
  229. content.WriteString(li)
  230. }
  231. content.WriteString("</ul></div>")
  232. if docQuery == nil {
  233. docQuery, err = goquery.NewDocumentFromReader(content)
  234. } else {
  235. if selector := docQuery.Find("div.wiki-bottom").First(); selector.Size() > 0 {
  236. selector.BeforeHtml(content.String())
  237. } else if selector := docQuery.Find("div.markdown-article").First(); selector.Size() > 0 {
  238. selector.AppendHtml(content.String())
  239. } else if selector := docQuery.Find("article.markdown-article-inner").First(); selector.Size() > 0 {
  240. selector.AppendHtml(content.String())
  241. }
  242. }
  243. }
  244. }
  245. //处理了文档底部信息
  246. if selector := docQuery.Find("div.wiki-bottom").First(); selector.Size() <= 0 && item.MemberId > 0 {
  247. //处理文档结尾信息
  248. docCreator, err := NewMember().Find(item.MemberId, "real_name", "account")
  249. release := "<div class=\"wiki-bottom\">"
  250. release += "作者:"
  251. if err == nil && docCreator != nil {
  252. if docCreator.RealName != "" {
  253. release += docCreator.RealName
  254. } else {
  255. release += docCreator.Account
  256. }
  257. }
  258. release += " &nbsp;创建时间:" + item.CreateTime.Local().Format("2006-01-02 15:04") + "<br>"
  259. if item.ModifyAt > 0 {
  260. docModify, err := NewMember().Find(item.ModifyAt, "real_name", "account")
  261. if err == nil {
  262. if docModify.RealName != "" {
  263. release += "最后编辑:" + docModify.RealName
  264. } else {
  265. release += "最后编辑:" + docModify.Account
  266. }
  267. }
  268. }
  269. release += " &nbsp;更新时间:" + item.ModifyTime.Local().Format("2006-01-02 15:04") + "<br>"
  270. release += "</div>"
  271. if selector := docQuery.Find("div.markdown-article").First(); selector.Size() > 0 {
  272. selector.AppendHtml(release)
  273. } else if selector := docQuery.Find("article.markdown-article-inner").First(); selector.Size() > 0 {
  274. selector.First().AppendHtml(release)
  275. }
  276. }
  277. cdnimg, _ := web.AppConfig.String("cdnimg")
  278. docQuery.Find("img").Each(func(i int, selection *goquery.Selection) {
  279. if src, ok := selection.Attr("src"); ok {
  280. src = strings.TrimSpace(strings.ToLower(src))
  281. //过滤掉没有链接的图片标签
  282. if src == "" || strings.HasPrefix(src, "data:text/html") {
  283. selection.Remove()
  284. return
  285. }
  286. //设置图片为CDN地址
  287. if cdnimg != "" && strings.HasPrefix(src, "/uploads/") {
  288. selection.SetAttr("src", utils.JoinURI(cdnimg, src))
  289. }
  290. }
  291. selection.RemoveAttr("onerror").RemoveAttr("onload")
  292. })
  293. //过滤A标签的非法连接
  294. docQuery.Find("a").Each(func(i int, selection *goquery.Selection) {
  295. if val, exists := selection.Attr("href"); exists {
  296. if val == "" {
  297. selection.SetAttr("href", "#")
  298. return
  299. }
  300. val = strings.Replace(strings.ToLower(val), " ", "", -1)
  301. //移除危险脚本链接
  302. if strings.HasPrefix(val, "data:text/html") ||
  303. strings.HasPrefix(val, "vbscript:") ||
  304. strings.HasPrefix(val, "&#106;avascript:") ||
  305. strings.HasPrefix(val, "javascript:") {
  306. selection.SetAttr("href", "#")
  307. }
  308. }
  309. //移除所有 onerror 属性
  310. selection.RemoveAttr("onerror").RemoveAttr("onload").RemoveAttr("onclick")
  311. })
  312. docQuery.Find("script").Remove()
  313. docQuery.Find("link").Remove()
  314. docQuery.Find("vbscript").Remove()
  315. if html, err := docQuery.Html(); err == nil {
  316. item.Release = strings.TrimSuffix(strings.TrimPrefix(strings.TrimSpace(html), "<html><head></head><body>"), "</body></html>")
  317. }
  318. }
  319. }
  320. return item
  321. }
  322. // 增加阅读次数
  323. func (item *Document) IncrViewCount(id int) {
  324. o := orm.NewOrm()
  325. o.QueryTable(item.TableNameWithPrefix()).Filter("document_id", id).Update(orm.Params{
  326. "view_count": orm.ColValue(orm.ColAdd, 1),
  327. })
  328. }