wkhtmltopdf.go 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294
  1. // Package wkhtmltopdf contains wrappers around the wkhtmltopdf commandline tool
  2. package wkhtmltopdf
  3. import (
  4. "bytes"
  5. "errors"
  6. "fmt"
  7. "io"
  8. "io/ioutil"
  9. "os"
  10. "os/exec"
  11. "path/filepath"
  12. "strings"
  13. )
  14. var binPath string //the cached paths as used by findPath()
  15. // SetPath sets the path to wkhtmltopdf
  16. func SetPath(path string) {
  17. binPath = path
  18. }
  19. // GetPath gets the path to wkhtmltopdf
  20. func GetPath() string {
  21. return binPath
  22. }
  23. // Page is the input struct for each page
  24. type Page struct {
  25. Input string
  26. PageOptions
  27. }
  28. // InputFile returns the input string and is part of the page interface
  29. func (p *Page) InputFile() string {
  30. return p.Input
  31. }
  32. // Args returns the argument slice and is part of the page interface
  33. func (p *Page) Args() []string {
  34. return p.PageOptions.Args()
  35. }
  36. // Reader returns the io.Reader and is part of the page interface
  37. func (p *Page) Reader() io.Reader {
  38. return nil
  39. }
  40. // NewPage creates a new input page from a local or web resource (filepath or URL)
  41. func NewPage(input string) *Page {
  42. return &Page{
  43. Input: input,
  44. PageOptions: NewPageOptions(),
  45. }
  46. }
  47. // PageReader is one input page (a HTML document) that is read from an io.Reader
  48. // You can add only one Page from a reader
  49. type PageReader struct {
  50. Input io.Reader
  51. PageOptions
  52. }
  53. // InputFile returns the input string and is part of the page interface
  54. func (pr *PageReader) InputFile() string {
  55. return "-"
  56. }
  57. // Args returns the argument slice and is part of the page interface
  58. func (pr *PageReader) Args() []string {
  59. return pr.PageOptions.Args()
  60. }
  61. //Reader returns the io.Reader and is part of the page interface
  62. func (pr *PageReader) Reader() io.Reader {
  63. return pr.Input
  64. }
  65. // NewPageReader creates a new PageReader from an io.Reader
  66. func NewPageReader(input io.Reader) *PageReader {
  67. return &PageReader{
  68. Input: input,
  69. PageOptions: NewPageOptions(),
  70. }
  71. }
  72. type page interface {
  73. Args() []string
  74. InputFile() string
  75. Reader() io.Reader
  76. }
  77. // PageOptions are options for each input page
  78. type PageOptions struct {
  79. pageOptions
  80. headerAndFooterOptions
  81. }
  82. // Args returns the argument slice
  83. func (po *PageOptions) Args() []string {
  84. return append(append([]string{}, po.pageOptions.Args()...), po.headerAndFooterOptions.Args()...)
  85. }
  86. // NewPageOptions returns a new PageOptions struct with all options
  87. func NewPageOptions() PageOptions {
  88. return PageOptions{
  89. pageOptions: newPageOptions(),
  90. headerAndFooterOptions: newHeaderAndFooterOptions(),
  91. }
  92. }
  93. // cover page
  94. type cover struct {
  95. Input string
  96. pageOptions
  97. }
  98. // table of contents
  99. type toc struct {
  100. Include bool
  101. allTocOptions
  102. }
  103. type allTocOptions struct {
  104. pageOptions
  105. tocOptions
  106. }
  107. // PDFGenerator is the main wkhtmltopdf struct, always use NewPDFGenerator to obtain a new PDFGenerator struct
  108. type PDFGenerator struct {
  109. globalOptions
  110. outlineOptions
  111. Cover cover
  112. TOC toc
  113. OutputFile string //filename to write to, default empty (writes to internal buffer)
  114. binPath string
  115. outbuf bytes.Buffer
  116. pages []page
  117. }
  118. //Args returns the commandline arguments as a string slice
  119. func (pdfg *PDFGenerator) Args() []string {
  120. args := []string{}
  121. args = append(args, pdfg.globalOptions.Args()...)
  122. args = append(args, pdfg.outlineOptions.Args()...)
  123. if pdfg.Cover.Input != "" {
  124. args = append(args, "cover")
  125. args = append(args, pdfg.Cover.Input)
  126. args = append(args, pdfg.Cover.pageOptions.Args()...)
  127. }
  128. if pdfg.TOC.Include {
  129. args = append(args, "toc")
  130. args = append(args, pdfg.TOC.pageOptions.Args()...)
  131. args = append(args, pdfg.TOC.tocOptions.Args()...)
  132. }
  133. for _, page := range pdfg.pages {
  134. args = append(args, "page")
  135. args = append(args, page.InputFile())
  136. args = append(args, page.Args()...)
  137. }
  138. if pdfg.OutputFile != "" {
  139. args = append(args, pdfg.OutputFile)
  140. } else {
  141. args = append(args, "-")
  142. }
  143. return args
  144. }
  145. // ArgString returns Args as a single string
  146. func (pdfg *PDFGenerator) ArgString() string {
  147. return strings.Join(pdfg.Args(), " ")
  148. }
  149. // AddPage adds a new input page to the document.
  150. // A page is an input HTML page, it can span multiple pages in the output document.
  151. // It is a Page when read from file or URL or a PageReader when read from memory.
  152. func (pdfg *PDFGenerator) AddPage(p page) {
  153. pdfg.pages = append(pdfg.pages, p)
  154. }
  155. // SetPages resets all pages
  156. func (pdfg *PDFGenerator) SetPages(p []page) {
  157. pdfg.pages = p
  158. }
  159. // Buffer returns the embedded output buffer used if OutputFile is empty
  160. func (pdfg *PDFGenerator) Buffer() *bytes.Buffer {
  161. return &pdfg.outbuf
  162. }
  163. // Bytes returns the output byte slice from the output buffer used if OutputFile is empty
  164. func (pdfg *PDFGenerator) Bytes() []byte {
  165. return pdfg.outbuf.Bytes()
  166. }
  167. // WriteFile writes the contents of the output buffer to a file
  168. func (pdfg *PDFGenerator) WriteFile(filename string) error {
  169. return ioutil.WriteFile(filename, pdfg.Bytes(), 0666)
  170. }
  171. //findPath finds the path to wkhtmltopdf by
  172. //- first looking in the current dir
  173. //- looking in the PATH and PATHEXT environment dirs
  174. //- using the WKHTMLTOPDF_PATH environment dir
  175. //The path is cached, meaning you can not change the location of wkhtmltopdf in
  176. //a running program once it has been found
  177. func (pdfg *PDFGenerator) findPath() error {
  178. const exe = "wkhtmltopdf"
  179. if binPath != "" {
  180. pdfg.binPath = binPath
  181. return nil
  182. }
  183. exeDir, err := filepath.Abs(filepath.Dir(os.Args[0]))
  184. if err != nil {
  185. return err
  186. }
  187. path, err := exec.LookPath(filepath.Join(exeDir, exe))
  188. if err == nil && path != "" {
  189. binPath = path
  190. pdfg.binPath = path
  191. return nil
  192. }
  193. path, err = exec.LookPath(exe)
  194. if err == nil && path != "" {
  195. binPath = path
  196. pdfg.binPath = path
  197. return nil
  198. }
  199. dir := os.Getenv("WKHTMLTOPDF_PATH")
  200. if dir == "" {
  201. return fmt.Errorf("%s not found", exe)
  202. }
  203. path, err = exec.LookPath(filepath.Join(dir, exe))
  204. if err == nil && path != "" {
  205. binPath = path
  206. pdfg.binPath = path
  207. return nil
  208. }
  209. return fmt.Errorf("%s not found", exe)
  210. }
  211. // Create creates the PDF document and stores it in the internal buffer if no error is returned
  212. func (pdfg *PDFGenerator) Create() error {
  213. return pdfg.run()
  214. }
  215. func (pdfg *PDFGenerator) run() error {
  216. errbuf := &bytes.Buffer{}
  217. cmd := exec.Command(pdfg.binPath, pdfg.Args()...)
  218. cmd.Stdout = &pdfg.outbuf
  219. cmd.Stderr = errbuf
  220. //if there is a pageReader page (from Stdin) we set Stdin to that reader
  221. for _, page := range pdfg.pages {
  222. if page.Reader() != nil {
  223. cmd.Stdin = page.Reader()
  224. break
  225. }
  226. }
  227. err := cmd.Run()
  228. if err != nil {
  229. errStr := errbuf.String()
  230. if strings.TrimSpace(errStr) == "" {
  231. errStr = err.Error()
  232. }
  233. return errors.New(errStr)
  234. }
  235. return nil
  236. }
  237. // NewPDFGenerator returns a new PDFGenerator struct with all options created and
  238. // checks if wkhtmltopdf can be found on the system
  239. func NewPDFGenerator() (*PDFGenerator, error) {
  240. pdfg := &PDFGenerator{
  241. globalOptions: newGlobalOptions(),
  242. outlineOptions: newOutlineOptions(),
  243. Cover: cover{
  244. pageOptions: newPageOptions(),
  245. },
  246. TOC: toc{
  247. allTocOptions: allTocOptions{
  248. tocOptions: newTocOptions(),
  249. pageOptions: newPageOptions(),
  250. },
  251. },
  252. }
  253. err := pdfg.findPath()
  254. return pdfg, err
  255. }