envelope.go 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337
  1. package mail
  2. import (
  3. "bufio"
  4. "bytes"
  5. "crypto/md5"
  6. "encoding/base64"
  7. "errors"
  8. "fmt"
  9. "gopkg.in/iconv.v1"
  10. "io"
  11. "io/ioutil"
  12. "mime/quotedprintable"
  13. "net/mail"
  14. "net/textproto"
  15. "regexp"
  16. "strings"
  17. "sync"
  18. "time"
  19. )
  20. const maxHeaderChunk = 1 + (3 << 10) // 3KB
  21. // Address encodes an email address of the form `<user@host>`
  22. type Address struct {
  23. User string
  24. Host string
  25. }
  26. func (ep *Address) String() string {
  27. return fmt.Sprintf("%s@%s", ep.User, ep.Host)
  28. }
  29. func (ep *Address) IsEmpty() bool {
  30. return ep.User == "" && ep.Host == ""
  31. }
  32. var ap = mail.AddressParser{}
  33. // NewAddress takes a string of an RFC 5322 address of the
  34. // form "Gogh Fir <[email protected]>" or "[email protected]".
  35. func NewAddress(str string) (Address, error) {
  36. a, err := ap.Parse(str)
  37. if err != nil {
  38. return Address{}, err
  39. }
  40. pos := strings.Index(a.Address, "@")
  41. if pos > 0 {
  42. return Address{
  43. User: a.Address[0:pos],
  44. Host: a.Address[pos+1:],
  45. },
  46. nil
  47. }
  48. return Address{}, errors.New("invalid address")
  49. }
  50. // Email represents a single SMTP message.
  51. type Envelope struct {
  52. // Remote IP address
  53. RemoteIP string
  54. // Message sent in EHLO command
  55. Helo string
  56. // Sender
  57. MailFrom Address
  58. // Recipients
  59. RcptTo []Address
  60. // Data stores the header and message body
  61. Data bytes.Buffer
  62. // Subject stores the subject of the email, extracted and decoded after calling ParseHeaders()
  63. Subject string
  64. // TLS is true if the email was received using a TLS connection
  65. TLS bool
  66. // Header stores the results from ParseHeaders()
  67. Header textproto.MIMEHeader
  68. // Values hold the values generated when processing the envelope by the backend
  69. Values map[string]interface{}
  70. // Hashes of each email on the rcpt
  71. Hashes []string
  72. // additional delivery header that may be added
  73. DeliveryHeader string
  74. // Email(s) will be queued with this id
  75. QueuedId string
  76. // When locked, it means that the envelope is being processed by the backend
  77. sync.Mutex
  78. }
  79. func NewEnvelope(remoteAddr string, clientID uint64) *Envelope {
  80. return &Envelope{
  81. RemoteIP: remoteAddr,
  82. Values: make(map[string]interface{}),
  83. QueuedId: queuedID(clientID),
  84. }
  85. }
  86. func queuedID(clientID uint64) string {
  87. return fmt.Sprintf("%x", md5.Sum([]byte(string(time.Now().Unix())+string(clientID))))
  88. }
  89. // ParseHeaders parses the headers into Header field of the Envelope struct.
  90. // Data buffer must be full before calling.
  91. // It assumes that at most 30kb of email data can be a header
  92. // Decoding of encoding to UTF is only done on the Subject, where the result is assigned to the Subject field
  93. func (e *Envelope) ParseHeaders() error {
  94. var err error
  95. if e.Header != nil {
  96. return errors.New("headers already parsed")
  97. }
  98. buf := bytes.NewBuffer(e.Data.Bytes())
  99. // find where the header ends, assuming that over 30 kb would be max
  100. max := maxHeaderChunk
  101. if buf.Len() < max {
  102. max = buf.Len()
  103. }
  104. // read in the chunk which we'll scan for the header
  105. chunk := make([]byte, max)
  106. buf.Read(chunk)
  107. headerEnd := strings.Index(string(chunk), "\n\n") // the first two new-lines chars are the End Of Header
  108. if headerEnd > -1 {
  109. header := chunk[0:headerEnd]
  110. headerReader := textproto.NewReader(bufio.NewReader(bytes.NewBuffer(header)))
  111. e.Header, err = headerReader.ReadMIMEHeader()
  112. if err != nil {
  113. // decode the subject
  114. if subject, ok := e.Header["Subject"]; ok {
  115. e.Subject = MimeHeaderDecode(subject[0])
  116. }
  117. }
  118. } else {
  119. err = errors.New("header not found")
  120. }
  121. return err
  122. }
  123. // Len returns the number of bytes that would be in the reader returned by NewReader()
  124. func (e *Envelope) Len() int {
  125. return len(e.DeliveryHeader) + e.Data.Len()
  126. }
  127. // Returns a new reader for reading the email contents, including the delivery headers
  128. func (e *Envelope) NewReader() io.Reader {
  129. return io.MultiReader(
  130. strings.NewReader(e.DeliveryHeader),
  131. bytes.NewReader(e.Data.Bytes()),
  132. )
  133. }
  134. // String converts the email to string.
  135. // Typically, you would want to use the compressor guerrilla.Processor for more efficiency, or use NewReader
  136. func (e *Envelope) String() string {
  137. return e.DeliveryHeader + e.Data.String()
  138. }
  139. // ResetTransaction is called when the transaction is reset (keeping the connection open)
  140. func (e *Envelope) ResetTransaction() {
  141. // ensure not processing by the backend, will only get lock if finished, otherwise block
  142. e.Lock()
  143. // got the lock, it means processing finished
  144. e.Unlock()
  145. e.MailFrom = Address{}
  146. e.RcptTo = []Address{}
  147. // reset the data buffer, keep it allocated
  148. e.Data.Reset()
  149. // todo: these are probably good candidates for buffers / use sync.Pool (after profiling)
  150. e.Subject = ""
  151. e.Header = nil
  152. e.Hashes = make([]string, 0)
  153. e.DeliveryHeader = ""
  154. e.Values = make(map[string]interface{})
  155. }
  156. // Seed is called when used with a new connection, once it's accepted
  157. func (e *Envelope) Reseed(RemoteIP string, clientID uint64) {
  158. e.RemoteIP = RemoteIP
  159. e.QueuedId = queuedID(clientID)
  160. e.Helo = ""
  161. e.TLS = false
  162. }
  163. // PushRcpt adds a recipient email address to the envelope
  164. func (e *Envelope) PushRcpt(addr Address) {
  165. e.RcptTo = append(e.RcptTo, addr)
  166. }
  167. // Pop removes the last email address that was pushed to the envelope
  168. func (e *Envelope) PopRcpt() Address {
  169. ret := e.RcptTo[len(e.RcptTo)-1]
  170. e.RcptTo = e.RcptTo[:len(e.RcptTo)-1]
  171. return ret
  172. }
  173. var mimeRegex, _ = regexp.Compile(`=\?(.+?)\?([QBqp])\?(.+?)\?=`)
  174. // Decode strings in Mime header format
  175. // eg. =?ISO-2022-JP?B?GyRCIVo9dztSOWJAOCVBJWMbKEI=?=
  176. // This function uses GNU iconv under the hood, for more charset support than in Go's library
  177. func MimeHeaderDecode(str string) string {
  178. matched := mimeRegex.FindAllStringSubmatch(str, -1)
  179. var charset, encoding, payload string
  180. if matched != nil {
  181. for i := 0; i < len(matched); i++ {
  182. if len(matched[i]) > 2 {
  183. charset = matched[i][1]
  184. encoding = strings.ToUpper(matched[i][2])
  185. payload = matched[i][3]
  186. switch encoding {
  187. case "B":
  188. str = strings.Replace(
  189. str,
  190. matched[i][0],
  191. MailTransportDecode(payload, "base64", charset),
  192. 1)
  193. case "Q":
  194. str = strings.Replace(
  195. str,
  196. matched[i][0],
  197. MailTransportDecode(payload, "quoted-printable", charset),
  198. 1)
  199. }
  200. }
  201. }
  202. }
  203. return str
  204. }
  205. // decode from 7bit to 8bit UTF-8
  206. // encodingType can be "base64" or "quoted-printable"
  207. func MailTransportDecode(str string, encodingType string, charset string) string {
  208. if charset == "" {
  209. charset = "UTF-8"
  210. } else {
  211. charset = strings.ToUpper(charset)
  212. }
  213. if encodingType == "base64" {
  214. str = fromBase64(str)
  215. } else if encodingType == "quoted-printable" {
  216. str = fromQuotedP(str)
  217. }
  218. if charset != "UTF-8" {
  219. charset = fixCharset(charset)
  220. // iconv is pretty good at what it does
  221. if cd, err := iconv.Open("UTF-8", charset); err == nil {
  222. defer func() {
  223. cd.Close()
  224. if r := recover(); r != nil {
  225. //logln(1, fmt.Sprintf("Recovered in %v", r))
  226. }
  227. }()
  228. // eg. charset can be "ISO-2022-JP"
  229. return cd.ConvString(str)
  230. }
  231. }
  232. return str
  233. }
  234. func fromBase64(data string) string {
  235. buf := bytes.NewBufferString(data)
  236. decoder := base64.NewDecoder(base64.StdEncoding, buf)
  237. res, _ := ioutil.ReadAll(decoder)
  238. return string(res)
  239. }
  240. func fromQuotedP(data string) string {
  241. res, _ := ioutil.ReadAll(quotedprintable.NewReader(strings.NewReader(data)))
  242. return string(res)
  243. }
  244. var charsetRegex, _ = regexp.Compile(`[_:.\/\\]`)
  245. func fixCharset(charset string) string {
  246. fixed_charset := charsetRegex.ReplaceAllString(charset, "-")
  247. // Fix charset
  248. // borrowed from http://squirrelmail.svn.sourceforge.net/viewvc/squirrelmail/trunk/squirrelmail/include/languages.php?revision=13765&view=markup
  249. // OE ks_c_5601_1987 > cp949
  250. fixed_charset = strings.Replace(fixed_charset, "ks-c-5601-1987", "cp949", -1)
  251. // Moz x-euc-tw > euc-tw
  252. fixed_charset = strings.Replace(fixed_charset, "x-euc", "euc", -1)
  253. // Moz x-windows-949 > cp949
  254. fixed_charset = strings.Replace(fixed_charset, "x-windows_", "cp", -1)
  255. // windows-125x and cp125x charsets
  256. fixed_charset = strings.Replace(fixed_charset, "windows-", "cp", -1)
  257. // ibm > cp
  258. fixed_charset = strings.Replace(fixed_charset, "ibm", "cp", -1)
  259. // iso-8859-8-i -> iso-8859-8
  260. fixed_charset = strings.Replace(fixed_charset, "iso-8859-8-i", "iso-8859-8", -1)
  261. if charset != fixed_charset {
  262. return fixed_charset
  263. }
  264. return charset
  265. }
  266. // Envelopes have their own pool
  267. type Pool struct {
  268. // envelopes that are ready to be borrowed
  269. pool chan *Envelope
  270. // semaphore to control number of maximum borrowed envelopes
  271. sem chan bool
  272. }
  273. func NewPool(poolSize int) *Pool {
  274. return &Pool{
  275. pool: make(chan *Envelope, poolSize),
  276. sem: make(chan bool, poolSize),
  277. }
  278. }
  279. func (p *Pool) Borrow(remoteAddr string, clientID uint64) *Envelope {
  280. var e *Envelope
  281. p.sem <- true // block the envelope until more room
  282. select {
  283. case e = <-p.pool:
  284. e.Reseed(remoteAddr, clientID)
  285. default:
  286. e = NewEnvelope(remoteAddr, clientID)
  287. }
  288. return e
  289. }
  290. // Return returns an envelope back to the envelope pool
  291. // Make sure that envelope finished processing before calling this
  292. func (p *Pool) Return(e *Envelope) {
  293. select {
  294. case p.pool <- e:
  295. //placed envelope back in pool
  296. default:
  297. // pool is full, discard it
  298. }
  299. // take a value off the semaphore to make room for more envelopes
  300. <-p.sem
  301. }