server.js 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232
  1. const fs = require('fs')
  2. const asfs = require('asfs')
  3. const del = require('delete')
  4. const express = require('express')
  5. const multer = require('multer')
  6. const childProcess = require('child_process')
  7. const bodyParser = require('body-parser')
  8. const puppeteer = require('puppeteer')
  9. const uuid = require('uuid')
  10. const parseDataUrl = require('data-urls')
  11. const path = require('path')
  12. const sanitize = require('./lib/sanitize')
  13. const expressLogging = require('express-logging')
  14. const log4js = require('log4js')
  15. const getConfig = require('microservice-config')
  16. const bytes = require('bytes')
  17. const {htmlEncode} = require('htmlencode')
  18. const pixelsPerPoint = 1.3333333333333333
  19. const config = getConfig({
  20. port: process.env.PORT || 0,
  21. logLevel: 'info',
  22. tempPath: 'temp',
  23. pdf2htmlexPath: `${__dirname}/node_modules/@alancnet/pdf2htmlex/bin-win/pdf2htmlEX.exe`,
  24. fileSizeLimit: '10mb',
  25. chromeUserDataDir: path.join(__dirname, 'temp/chrome')
  26. })
  27. const logger = log4js.getLogger('Server')
  28. log4js.getLogger().level = config.logLevel
  29. const chromePromise = (async () => {
  30. if (await asfs.existsAsync(config.chromeUserDataDir)) {
  31. logger.info('Cleaning Chrome user data directory...')
  32. await del.promise(config.chromeUserDataDir)
  33. }
  34. logger.info('Launching chrome...')
  35. const chrome = await puppeteer.launch({
  36. userDataDir: config.chromeUserDataDir
  37. })
  38. logger.info('Chrome is running.')
  39. return chrome
  40. })()
  41. const upload = ext => multer({
  42. storage: multer.diskStorage({
  43. destination: (req, file, cb) => cb(null, config.tempPath),
  44. filename: (req, file, cb) => {
  45. file.uuid = uuid()
  46. cb(null, `${file.uuid}.${ext}`)
  47. }
  48. }),
  49. limits: {
  50. fileSize: bytes(config.fileSizeLimit)
  51. }
  52. })
  53. const app = express()
  54. let counter = 0
  55. // Provide a unique id and a logger to each request
  56. app.use((req, res, next) => {
  57. req.id = counter++
  58. req.logger = log4js.getLogger(`Request ${req.id}`)
  59. next()
  60. })
  61. app.use(expressLogging(logger))
  62. const async = (fn) => (req, res, next) => {
  63. fn(req, res, next)
  64. .then(next)
  65. .catch(err => {
  66. req.logger.error(err)
  67. res.status(500).send(`<pre>${err}</pre>`)
  68. })
  69. }
  70. const exec = (logger, commandLine) => new Promise((resolve, reject) => {
  71. logger.debug(`Executing: ${commandLine}`)
  72. childProcess.exec(commandLine, (error, stdout, stderr) => {
  73. if (error) {
  74. logger.error(`Exec error: ${error}\n${stderr}`)
  75. reject(stderr)
  76. } else {
  77. logger.trace(`Exec success`)
  78. resolve({stdout, stderr})
  79. }
  80. })
  81. })
  82. app.use(bodyParser.urlencoded({extended: false, limit: config.fileSizeLimit}))
  83. app.get('/', (req, res) => res.redirect('/edit'))
  84. app.use(express.static('./public'))
  85. const viewerCommon = mode => async(async (req, res) => {
  86. const logger = req.logger
  87. let pdfFile, htmlFile
  88. if (req.file) {
  89. logger.debug(`Request includes file: ${req.file.originalname}`)
  90. pdfFile = req.file.path
  91. htmlFile = `${config.tempPath}/${req.file.uuid}.html`
  92. } else if (req.body.url) {
  93. logger.debug(`Request includes url.`)
  94. pdfFile = `${config.tempPath}/${uuid()}.pdf`
  95. htmlFile = `${pdfFile}.html`
  96. const pdf = parseDataUrl(req.body.url)
  97. logger.debug(`Writing ${pdfFile}...`)
  98. await asfs.writeFileAsync(pdfFile, pdf.body)
  99. } else {
  100. logger.debug(`Request includes no data.`)
  101. return res.status(400).send('Requires file or url.')
  102. }
  103. /*
  104. Executes pdf2htmlex[.exe] with:
  105. - 200 horizontal and vertical DPI
  106. - TrueType Font format (because woff does not render in PhantomJS)
  107. - No DRM, overriding any PDF settings forbidding copying or modifying
  108. - The path to the source PDF file
  109. - The path to the output HTML file
  110. */
  111. await exec(logger, `"${config.pdf2htmlexPath}" --hdpi 200 --vdpi 200 --font-format ttf --no-drm 1 "${pdfFile}" "${htmlFile}"`)
  112. logger.debug(`Reading public/viewer.html...`)
  113. const editHtml = await asfs.readFileAsync('public/viewer.html')
  114. logger.debug(`Reading ${htmlFile}...`)
  115. const html = await asfs.readFileAsync(htmlFile, 'utf8')
  116. const pageWidth = (/\.w0{width:([\d\.]*)pt/).exec(html)[1] * 1
  117. const pageHeight = (/\.h0{height:([\d\.]*)pt/).exec(html)[1] * 1
  118. const pageStyle = `
  119. html {
  120. -webkit-print-color-adjust: exact;
  121. }
  122. /* Prevent overflow into blank pages */
  123. .pf {
  124. max-height: ${pageHeight * pixelsPerPoint / 96 * 0.999}in;
  125. overflow-y: hidden;
  126. }
  127. @page {
  128. width: ${pageWidth / 96}in;
  129. height: ${pageHeight / 96}in;
  130. margin: 0;
  131. }
  132. `
  133. logger.trace(`Page style: ${pageStyle}`)
  134. res.status(200).send(html
  135. .replace(
  136. '</body>',
  137. `<input type="hidden" id="viewerMode" value="${htmlEncode(mode)}" />
  138. <input type="hidden" id="filename" value="${htmlEncode(req.file.originalname)}" />
  139. ${editHtml}</body>`
  140. )
  141. .replace(
  142. '</head>',
  143. `<style type="text/css">${pageStyle}</style></head>`
  144. )
  145. )
  146. logger.debug(`Deleting ${htmlFile}...`)
  147. await asfs.unlinkAsync(htmlFile)
  148. logger.debug(`Deleting ${pdfFile}...`)
  149. await asfs.unlinkAsync(pdfFile)
  150. logger.debug(`Done.`)
  151. })
  152. app.get('/edit', (req, res) => res.sendFile(path.join(__dirname, './public/index.html')))
  153. app.post('/edit', upload('pdf').single('document'), viewerCommon('edit'))
  154. app.get('/view', (req, res) => res.sendFile(path.join(__dirname, './public/index.html')))
  155. app.post('/view', upload('pdf').single('document'), viewerCommon('view'))
  156. app.post('/save', async(async (req, res) => {
  157. const logger = req.logger
  158. const tmpUuid = uuid()
  159. const htmlFile = `${config.tempPath}/${tmpUuid}.html`
  160. const htmlUrl = `file://${path.join(process.cwd(), htmlFile)}`
  161. const pdfFile = `${config.tempPath}/${tmpUuid}.pdf`
  162. const html = sanitize(req.body.html)
  163. const filename = req.body.filename || 'document.pdf'
  164. logger.trace({tmpUuid, htmlFile, htmlUrl, pdfFile})
  165. const pageWidth = /\.w0{width:([\d\.]*)pt/.exec(html)[1]
  166. const pageHeight = /\.h0{height:([\d\.]*)pt/.exec(html)[1]
  167. logger.trace({pageWidth, pageHeight})
  168. logger.debug(`Writing HTML file, ${html.length} chars to ${htmlFile}...`)
  169. await asfs.writeFileAsync(htmlFile, html, 'utf8')
  170. logger.debug(`Getting chrome...`)
  171. const chrome = await chromePromise
  172. logger.debug(`Getting new page...`)
  173. const page = await chrome.newPage()
  174. logger.debug(`Navigating to ${htmlUrl}...`)
  175. await page.goto(htmlUrl)
  176. logger.debug(`Capturing PDF to ${pdfFile}...`)
  177. await page.pdf({
  178. path: pdfFile,
  179. preferCSSPageSize: true
  180. })
  181. logger.debug(`Closing page...`)
  182. await page.close()
  183. logger.debug(`Sending ${pdfFile} to client...`)
  184. await new Promise((resolve, reject) => res.download(pdfFile, filename, err => err ? reject(err) : resolve()))
  185. logger.debug(`Deleting ${htmlFile}...`)
  186. await asfs.unlinkAsync(htmlFile)
  187. logger.debug(`Deleting ${pdfFile}...`)
  188. await asfs.unlinkAsync(pdfFile)
  189. logger.debug(`Done.`)
  190. }))
  191. const listener = app.listen(config.port, (err) => {
  192. if (err) logger.error(err)
  193. else {
  194. const address = listener.address()
  195. logger.info(`Listening on http://127.0.0.1:${address.port}`)
  196. }
  197. })