| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232 |
- const fs = require('fs')
- const asfs = require('asfs')
- const del = require('delete')
- const express = require('express')
- const multer = require('multer')
- const childProcess = require('child_process')
- const bodyParser = require('body-parser')
- const puppeteer = require('puppeteer')
- const uuid = require('uuid')
- const parseDataUrl = require('data-urls')
- const path = require('path')
- const sanitize = require('./lib/sanitize')
- const expressLogging = require('express-logging')
- const log4js = require('log4js')
- const getConfig = require('microservice-config')
- const bytes = require('bytes')
- const {htmlEncode} = require('htmlencode')
- const pixelsPerPoint = 1.3333333333333333
- const config = getConfig({
- port: process.env.PORT || 0,
- logLevel: 'info',
- tempPath: 'temp',
- pdf2htmlexPath: `${__dirname}/node_modules/@alancnet/pdf2htmlex/bin-win/pdf2htmlEX.exe`,
- fileSizeLimit: '10mb',
- chromeUserDataDir: path.join(__dirname, 'temp/chrome')
- })
- const logger = log4js.getLogger('Server')
- log4js.getLogger().level = config.logLevel
- const chromePromise = (async () => {
- if (await asfs.existsAsync(config.chromeUserDataDir)) {
- logger.info('Cleaning Chrome user data directory...')
- await del.promise(config.chromeUserDataDir)
- }
- logger.info('Launching chrome...')
- const chrome = await puppeteer.launch({
- userDataDir: config.chromeUserDataDir
- })
- logger.info('Chrome is running.')
- return chrome
- })()
- const upload = ext => multer({
- storage: multer.diskStorage({
- destination: (req, file, cb) => cb(null, config.tempPath),
- filename: (req, file, cb) => {
- file.uuid = uuid()
- cb(null, `${file.uuid}.${ext}`)
- }
- }),
- limits: {
- fileSize: bytes(config.fileSizeLimit)
- }
- })
- const app = express()
- let counter = 0
- // Provide a unique id and a logger to each request
- app.use((req, res, next) => {
- req.id = counter++
- req.logger = log4js.getLogger(`Request ${req.id}`)
- next()
- })
- app.use(expressLogging(logger))
- const async = (fn) => (req, res, next) => {
- fn(req, res, next)
- .then(next)
- .catch(err => {
- req.logger.error(err)
- res.status(500).send(`<pre>${err}</pre>`)
- })
- }
- const exec = (logger, commandLine) => new Promise((resolve, reject) => {
- logger.debug(`Executing: ${commandLine}`)
- childProcess.exec(commandLine, (error, stdout, stderr) => {
- if (error) {
- logger.error(`Exec error: ${error}\n${stderr}`)
- reject(stderr)
- } else {
- logger.trace(`Exec success`)
- resolve({stdout, stderr})
- }
- })
- })
- app.use(bodyParser.urlencoded({extended: false, limit: config.fileSizeLimit}))
- app.get('/', (req, res) => res.redirect('/edit'))
- app.use(express.static('./public'))
- const viewerCommon = mode => async(async (req, res) => {
- const logger = req.logger
- let pdfFile, htmlFile
- if (req.file) {
- logger.debug(`Request includes file: ${req.file.originalname}`)
- pdfFile = req.file.path
- htmlFile = `${config.tempPath}/${req.file.uuid}.html`
- } else if (req.body.url) {
- logger.debug(`Request includes url.`)
- pdfFile = `${config.tempPath}/${uuid()}.pdf`
- htmlFile = `${pdfFile}.html`
- const pdf = parseDataUrl(req.body.url)
- logger.debug(`Writing ${pdfFile}...`)
- await asfs.writeFileAsync(pdfFile, pdf.body)
- } else {
- logger.debug(`Request includes no data.`)
- return res.status(400).send('Requires file or url.')
- }
- /*
- Executes pdf2htmlex[.exe] with:
- - 200 horizontal and vertical DPI
- - TrueType Font format (because woff does not render in PhantomJS)
- - No DRM, overriding any PDF settings forbidding copying or modifying
- - The path to the source PDF file
- - The path to the output HTML file
- */
- await exec(logger, `"${config.pdf2htmlexPath}" --hdpi 200 --vdpi 200 --font-format ttf --no-drm 1 "${pdfFile}" "${htmlFile}"`)
- logger.debug(`Reading public/viewer.html...`)
- const editHtml = await asfs.readFileAsync('public/viewer.html')
- logger.debug(`Reading ${htmlFile}...`)
- const html = await asfs.readFileAsync(htmlFile, 'utf8')
- const pageWidth = (/\.w0{width:([\d\.]*)pt/).exec(html)[1] * 1
- const pageHeight = (/\.h0{height:([\d\.]*)pt/).exec(html)[1] * 1
- const pageStyle = `
- html {
- -webkit-print-color-adjust: exact;
- }
- /* Prevent overflow into blank pages */
- .pf {
- max-height: ${pageHeight * pixelsPerPoint / 96 * 0.999}in;
- overflow-y: hidden;
- }
- @page {
- width: ${pageWidth / 96}in;
- height: ${pageHeight / 96}in;
- margin: 0;
- }
- `
- logger.trace(`Page style: ${pageStyle}`)
- res.status(200).send(html
- .replace(
- '</body>',
- `<input type="hidden" id="viewerMode" value="${htmlEncode(mode)}" />
- <input type="hidden" id="filename" value="${htmlEncode(req.file.originalname)}" />
- ${editHtml}</body>`
- )
- .replace(
- '</head>',
- `<style type="text/css">${pageStyle}</style></head>`
- )
- )
- logger.debug(`Deleting ${htmlFile}...`)
- await asfs.unlinkAsync(htmlFile)
- logger.debug(`Deleting ${pdfFile}...`)
- await asfs.unlinkAsync(pdfFile)
- logger.debug(`Done.`)
- })
- app.get('/edit', (req, res) => res.sendFile(path.join(__dirname, './public/index.html')))
- app.post('/edit', upload('pdf').single('document'), viewerCommon('edit'))
- app.get('/view', (req, res) => res.sendFile(path.join(__dirname, './public/index.html')))
- app.post('/view', upload('pdf').single('document'), viewerCommon('view'))
- app.post('/save', async(async (req, res) => {
- const logger = req.logger
- const tmpUuid = uuid()
- const htmlFile = `${config.tempPath}/${tmpUuid}.html`
- const htmlUrl = `file://${path.join(process.cwd(), htmlFile)}`
- const pdfFile = `${config.tempPath}/${tmpUuid}.pdf`
- const html = sanitize(req.body.html)
- const filename = req.body.filename || 'document.pdf'
- logger.trace({tmpUuid, htmlFile, htmlUrl, pdfFile})
- const pageWidth = /\.w0{width:([\d\.]*)pt/.exec(html)[1]
- const pageHeight = /\.h0{height:([\d\.]*)pt/.exec(html)[1]
- logger.trace({pageWidth, pageHeight})
- logger.debug(`Writing HTML file, ${html.length} chars to ${htmlFile}...`)
- await asfs.writeFileAsync(htmlFile, html, 'utf8')
-
- logger.debug(`Getting chrome...`)
- const chrome = await chromePromise
-
- logger.debug(`Getting new page...`)
- const page = await chrome.newPage()
- logger.debug(`Navigating to ${htmlUrl}...`)
- await page.goto(htmlUrl)
- logger.debug(`Capturing PDF to ${pdfFile}...`)
- await page.pdf({
- path: pdfFile,
- preferCSSPageSize: true
- })
- logger.debug(`Closing page...`)
- await page.close()
- logger.debug(`Sending ${pdfFile} to client...`)
- await new Promise((resolve, reject) => res.download(pdfFile, filename, err => err ? reject(err) : resolve()))
- logger.debug(`Deleting ${htmlFile}...`)
- await asfs.unlinkAsync(htmlFile)
- logger.debug(`Deleting ${pdfFile}...`)
- await asfs.unlinkAsync(pdfFile)
- logger.debug(`Done.`)
- }))
- const listener = app.listen(config.port, (err) => {
- if (err) logger.error(err)
- else {
- const address = listener.address()
- logger.info(`Listening on http://127.0.0.1:${address.port}`)
- }
- })
|