html.go 1.1 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556
  1. package utils
  2. import (
  3. "regexp"
  4. "strings"
  5. )
  6. func StripTags(s string) string {
  7. //将HTML标签全转换成小写
  8. re, _ := regexp.Compile("\\<[\\S\\s]+?\\>")
  9. src := re.ReplaceAllStringFunc(s, strings.ToLower)
  10. //去除STYLE
  11. re, _ = regexp.Compile("\\<style[\\S\\s]+?\\</style\\>")
  12. src = re.ReplaceAllString(src, "")
  13. //去除SCRIPT
  14. re, _ = regexp.Compile("\\<script[\\S\\s]+?\\</script\\>")
  15. src = re.ReplaceAllString(src, "")
  16. //去除所有尖括号内的HTML代码,并换成换行符
  17. re, _ = regexp.Compile("\\<[\\S\\s]+?\\>")
  18. src = re.ReplaceAllString(src, "\n")
  19. //去除连续的换行符
  20. re, _ = regexp.Compile("\\s{2,}")
  21. src = re.ReplaceAllString(src, "\n")
  22. return src
  23. }
  24. //自动提取文章摘要
  25. func AutoSummary(body string,l int) string {
  26. //匹配图片,如果图片语法是在代码块中,这里同样会处理
  27. re := regexp.MustCompile(`<p>(.*?)</p>`)
  28. contents := re.FindAllString(body, -1)
  29. if len(contents) <= 0 {
  30. return ""
  31. }
  32. content := ""
  33. for _,s := range contents {
  34. b := strings.Replace(StripTags(s),"\n","", -1)
  35. if l <= 0 {
  36. break
  37. }
  38. l = l - len([]rune(b))
  39. content += b
  40. }
  41. return content
  42. }