envelope.go 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186
  1. package envelope
  2. import (
  3. "bufio"
  4. "bytes"
  5. "encoding/base64"
  6. "errors"
  7. "fmt"
  8. "github.com/sloonz/go-qprintable"
  9. "gopkg.in/iconv.v1"
  10. "io/ioutil"
  11. "net/textproto"
  12. "regexp"
  13. "strings"
  14. )
  15. // EmailAddress encodes an email address of the form `<user@host>`
  16. type EmailAddress struct {
  17. User string
  18. Host string
  19. }
  20. func (ep *EmailAddress) String() string {
  21. return fmt.Sprintf("%s@%s", ep.User, ep.Host)
  22. }
  23. func (ep *EmailAddress) IsEmpty() bool {
  24. return ep.User == "" && ep.Host == ""
  25. }
  26. // Email represents a single SMTP message.
  27. type Envelope struct {
  28. // Remote IP address
  29. RemoteAddress string
  30. // Message sent in EHLO command
  31. Helo string
  32. // Sender
  33. MailFrom EmailAddress
  34. // Recipients
  35. RcptTo []EmailAddress
  36. // Data stores the header and message body
  37. Data bytes.Buffer
  38. // Subject stores the subject of the email, extracted and decoded after calling ParseHeaders()
  39. Subject string
  40. // TLS is true if the email was received using a TLS connection
  41. TLS bool
  42. // Header stores the results from ParseHeaders()
  43. Header textproto.MIMEHeader
  44. }
  45. // ParseHeaders parses the headers into Header field of the Envelope struct.
  46. // Data buffer must be full before calling.
  47. // It assumes that at most 30kb of email data can be a header
  48. // Decoding of encoding to UTF is only done on the Subject, where the result is assigned to the Subject field
  49. func (e *Envelope) ParseHeaders() error {
  50. var err error
  51. if e.Header != nil {
  52. return errors.New("Headers already parsed")
  53. }
  54. all := e.Data.Bytes()
  55. // find where the header ends, assuming that over 30 kb would be max
  56. max := 1024 * 30
  57. if len(all) < max {
  58. max = len(all) - 1
  59. }
  60. headerEnd := bytes.Index(all[:max], []byte("\n\n"))
  61. if headerEnd > -1 {
  62. headerReader := textproto.NewReader(bufio.NewReader(bytes.NewBuffer(all[0:headerEnd])))
  63. e.Header, err = headerReader.ReadMIMEHeader()
  64. if err != nil {
  65. // decode the subject
  66. if subject, ok := e.Header["Subject"]; ok {
  67. e.Subject = MimeHeaderDecode(subject[0])
  68. }
  69. }
  70. } else {
  71. err = errors.New("header not found")
  72. }
  73. return err
  74. }
  75. var mimeRegex, _ = regexp.Compile(`=\?(.+?)\?([QBqp])\?(.+?)\?=`)
  76. // Decode strings in Mime header format
  77. // eg. =?ISO-2022-JP?B?GyRCIVo9dztSOWJAOCVBJWMbKEI=?=
  78. func MimeHeaderDecode(str string) string {
  79. matched := mimeRegex.FindAllStringSubmatch(str, -1)
  80. var charset, encoding, payload string
  81. if matched != nil {
  82. for i := 0; i < len(matched); i++ {
  83. if len(matched[i]) > 2 {
  84. charset = matched[i][1]
  85. encoding = strings.ToUpper(matched[i][2])
  86. payload = matched[i][3]
  87. switch encoding {
  88. case "B":
  89. str = strings.Replace(
  90. str,
  91. matched[i][0],
  92. MailTransportDecode(payload, "base64", charset),
  93. 1)
  94. case "Q":
  95. str = strings.Replace(
  96. str,
  97. matched[i][0],
  98. MailTransportDecode(payload, "quoted-printable", charset),
  99. 1)
  100. }
  101. }
  102. }
  103. }
  104. return str
  105. }
  106. // decode from 7bit to 8bit UTF-8
  107. // encodingType can be "base64" or "quoted-printable"
  108. func MailTransportDecode(str string, encodingType string, charset string) string {
  109. if charset == "" {
  110. charset = "UTF-8"
  111. } else {
  112. charset = strings.ToUpper(charset)
  113. }
  114. if encodingType == "base64" {
  115. str = fromBase64(str)
  116. } else if encodingType == "quoted-printable" {
  117. str = fromQuotedP(str)
  118. }
  119. if charset != "UTF-8" {
  120. charset = fixCharset(charset)
  121. // iconv is pretty good at what it does
  122. if cd, err := iconv.Open("UTF-8", charset); err == nil {
  123. defer func() {
  124. cd.Close()
  125. if r := recover(); r != nil {
  126. //logln(1, fmt.Sprintf("Recovered in %v", r))
  127. }
  128. }()
  129. // eg. charset can be "ISO-2022-JP"
  130. return cd.ConvString(str)
  131. }
  132. }
  133. return str
  134. }
  135. func fromBase64(data string) string {
  136. buf := bytes.NewBufferString(data)
  137. decoder := base64.NewDecoder(base64.StdEncoding, buf)
  138. res, _ := ioutil.ReadAll(decoder)
  139. return string(res)
  140. }
  141. func fromQuotedP(data string) string {
  142. buf := bytes.NewBufferString(data)
  143. decoder := qprintable.NewDecoder(qprintable.BinaryEncoding, buf)
  144. res, _ := ioutil.ReadAll(decoder)
  145. return string(res)
  146. }
  147. var charsetRegex, _ = regexp.Compile(`[_:.\/\\]`)
  148. func fixCharset(charset string) string {
  149. fixed_charset := charsetRegex.ReplaceAllString(charset, "-")
  150. // Fix charset
  151. // borrowed from http://squirrelmail.svn.sourceforge.net/viewvc/squirrelmail/trunk/squirrelmail/include/languages.php?revision=13765&view=markup
  152. // OE ks_c_5601_1987 > cp949
  153. fixed_charset = strings.Replace(fixed_charset, "ks-c-5601-1987", "cp949", -1)
  154. // Moz x-euc-tw > euc-tw
  155. fixed_charset = strings.Replace(fixed_charset, "x-euc", "euc", -1)
  156. // Moz x-windows-949 > cp949
  157. fixed_charset = strings.Replace(fixed_charset, "x-windows_", "cp", -1)
  158. // windows-125x and cp125x charsets
  159. fixed_charset = strings.Replace(fixed_charset, "windows-", "cp", -1)
  160. // ibm > cp
  161. fixed_charset = strings.Replace(fixed_charset, "ibm", "cp", -1)
  162. // iso-8859-8-i -> iso-8859-8
  163. fixed_charset = strings.Replace(fixed_charset, "iso-8859-8-i", "iso-8859-8", -1)
  164. if charset != fixed_charset {
  165. return fixed_charset
  166. }
  167. return charset
  168. }