const fs = require('fs') const asfs = require('asfs') const del = require('delete') const express = require('express') const multer = require('multer') const childProcess = require('child_process') const bodyParser = require('body-parser') const puppeteer = require('puppeteer') const uuid = require('uuid') const parseDataUrl = require('data-urls') const path = require('path') const sanitize = require('./lib/sanitize') const expressLogging = require('express-logging') const log4js = require('log4js') const getConfig = require('microservice-config') const bytes = require('bytes') const {htmlEncode} = require('htmlencode') const pixelsPerPoint = 1.3333333333333333 const config = getConfig({ port: process.env.PORT || 0, logLevel: 'info', tempPath: 'temp', pdf2htmlexPath: `${__dirname}/node_modules/@alancnet/pdf2htmlex/bin-win/pdf2htmlEX.exe`, fileSizeLimit: '10mb', chromeUserDataDir: path.join(__dirname, 'temp/chrome') }) const logger = log4js.getLogger('Server') log4js.getLogger().level = config.logLevel const chromePromise = (async () => { if (await asfs.existsAsync(config.chromeUserDataDir)) { logger.info('Cleaning Chrome user data directory...') await del.promise(config.chromeUserDataDir) } logger.info('Launching chrome...') const chrome = await puppeteer.launch({ userDataDir: config.chromeUserDataDir }) logger.info('Chrome is running.') return chrome })() const upload = ext => multer({ storage: multer.diskStorage({ destination: (req, file, cb) => cb(null, config.tempPath), filename: (req, file, cb) => { file.uuid = uuid() cb(null, `${file.uuid}.${ext}`) } }), limits: { fileSize: bytes(config.fileSizeLimit) } }) const app = express() let counter = 0 // Provide a unique id and a logger to each request app.use((req, res, next) => { req.id = counter++ req.logger = log4js.getLogger(`Request ${req.id}`) next() }) app.use(expressLogging(logger)) const async = (fn) => (req, res, next) => { fn(req, res, next) .then(next) .catch(err => { req.logger.error(err) res.status(500).send(`
${err}
`) }) } const exec = (logger, commandLine) => new Promise((resolve, reject) => { logger.debug(`Executing: ${commandLine}`) childProcess.exec(commandLine, (error, stdout, stderr) => { if (error) { logger.error(`Exec error: ${error}\n${stderr}`) reject(stderr) } else { logger.trace(`Exec success`) resolve({stdout, stderr}) } }) }) app.use(bodyParser.urlencoded({extended: false, limit: config.fileSizeLimit})) app.get('/', (req, res) => res.redirect('/edit')) app.use(express.static('./public')) const viewerCommon = mode => async(async (req, res) => { const logger = req.logger let pdfFile, htmlFile if (req.file) { logger.debug(`Request includes file: ${req.file.originalname}`) pdfFile = req.file.path htmlFile = `${config.tempPath}/${req.file.uuid}.html` } else if (req.body.url) { logger.debug(`Request includes url.`) pdfFile = `${config.tempPath}/${uuid()}.pdf` htmlFile = `${pdfFile}.html` const pdf = parseDataUrl(req.body.url) logger.debug(`Writing ${pdfFile}...`) await asfs.writeFileAsync(pdfFile, pdf.body) } else { logger.debug(`Request includes no data.`) return res.status(400).send('Requires file or url.') } /* Executes pdf2htmlex[.exe] with: - 200 horizontal and vertical DPI - TrueType Font format (because woff does not render in PhantomJS) - No DRM, overriding any PDF settings forbidding copying or modifying - The path to the source PDF file - The path to the output HTML file */ await exec(logger, `"${config.pdf2htmlexPath}" --hdpi 200 --vdpi 200 --font-format ttf --no-drm 1 "${pdfFile}" "${htmlFile}"`) logger.debug(`Reading public/viewer.html...`) const editHtml = await asfs.readFileAsync('public/viewer.html') logger.debug(`Reading ${htmlFile}...`) const html = await asfs.readFileAsync(htmlFile, 'utf8') const pageWidth = (/\.w0{width:([\d\.]*)pt/).exec(html)[1] * 1 const pageHeight = (/\.h0{height:([\d\.]*)pt/).exec(html)[1] * 1 const pageStyle = ` html { -webkit-print-color-adjust: exact; } /* Prevent overflow into blank pages */ .pf { max-height: ${pageHeight * pixelsPerPoint / 96 * 0.999}in; overflow-y: hidden; } @page { width: ${pageWidth / 96}in; height: ${pageHeight / 96}in; margin: 0; } ` logger.trace(`Page style: ${pageStyle}`) res.status(200).send(html .replace( '', ` ${editHtml}` ) .replace( '', `` ) ) logger.debug(`Deleting ${htmlFile}...`) await asfs.unlinkAsync(htmlFile) logger.debug(`Deleting ${pdfFile}...`) await asfs.unlinkAsync(pdfFile) logger.debug(`Done.`) }) app.get('/edit', (req, res) => res.sendFile(path.join(__dirname, './public/index.html'))) app.post('/edit', upload('pdf').single('document'), viewerCommon('edit')) app.get('/view', (req, res) => res.sendFile(path.join(__dirname, './public/index.html'))) app.post('/view', upload('pdf').single('document'), viewerCommon('view')) app.post('/save', async(async (req, res) => { const logger = req.logger const tmpUuid = uuid() const htmlFile = `${config.tempPath}/${tmpUuid}.html` const htmlUrl = `file://${path.join(process.cwd(), htmlFile)}` const pdfFile = `${config.tempPath}/${tmpUuid}.pdf` const html = sanitize(req.body.html) const filename = req.body.filename || 'document.pdf' logger.trace({tmpUuid, htmlFile, htmlUrl, pdfFile}) const pageWidth = /\.w0{width:([\d\.]*)pt/.exec(html)[1] const pageHeight = /\.h0{height:([\d\.]*)pt/.exec(html)[1] logger.trace({pageWidth, pageHeight}) logger.debug(`Writing HTML file, ${html.length} chars to ${htmlFile}...`) await asfs.writeFileAsync(htmlFile, html, 'utf8') logger.debug(`Getting chrome...`) const chrome = await chromePromise logger.debug(`Getting new page...`) const page = await chrome.newPage() logger.debug(`Navigating to ${htmlUrl}...`) await page.goto(htmlUrl) logger.debug(`Capturing PDF to ${pdfFile}...`) await page.pdf({ path: pdfFile, preferCSSPageSize: true }) logger.debug(`Closing page...`) await page.close() logger.debug(`Sending ${pdfFile} to client...`) await new Promise((resolve, reject) => res.download(pdfFile, filename, err => err ? reject(err) : resolve())) logger.debug(`Deleting ${htmlFile}...`) await asfs.unlinkAsync(htmlFile) logger.debug(`Deleting ${pdfFile}...`) await asfs.unlinkAsync(pdfFile) logger.debug(`Done.`) })) const listener = app.listen(config.port, (err) => { if (err) logger.error(err) else { const address = listener.address() logger.info(`Listening on http://127.0.0.1:${address.port}`) } })