如何把代码批量转换成打印 pdf 文件？

2024/1/22

有时候我们需要把代码转换成 pdf 文件用于打印，特别是用于软件著作权申请。但是搜索了一圈，发现基本上各种网页插件、编辑器插件都只能单独地一个文件一个文件处理。

作为一个程序员，当然不能这样傻傻的一个一个处理代码文件。代码文件数量多、行数长短不一、而且分布在不同的文件夹里。一个一个处理太慢了，效率太低。

今天我们介绍一个把代码批量转换成打印 pdf 文件的小软件，来完成打印代码的任务，而且可以支持代码行号、语法高亮，可以自定义样式，非常灵活。

总体流程

通过 git ls-files 取得需要打印的代码文件路径，生成 filelist.txt 文件，调整源码文件为需要的顺序。
依次读取这些代码文件，通过 highlight.js 生成 html 文件。
使用 puppeteer 依次打开上一步生成的 html 文件，转换成 pdf 文件。
使用 pdf-merger-js 把每个 pdf 文件合并起来生成一份代码 pdf 文件。
使用 pdf-lib 在合并后的 pdf 文件页眉添加标识。

使用的技术栈

cloc
计算源码行数
highlight.js
代码着色生成 html 文件
puppeteer
打开 html 文件，另存为 pdf 文件
pdf-merger-js
合并 pdf 文件
pdfl-lib
修改 pdf 文件，添加页眉标识

具体实现

我们使用 NodeJS 开发这个软件。

获取要打印的源码文件

此步骤常见方法是通过 git 管理的文件中获取。

在源码目录运行

计算代码行数

cloc $(git ls-files)

获取 git 仓库的文件清单

git ls-files > filelist.txt

生成 filelist.txt 文件后，打开它，清除不需要的 zip/xlsx/png/docx/.gitignore/cache 等文件。

调整文件的排列顺序，以备后续进行合并操作。

读取代码文件生成 html

此步骤使用 highlight.js 完成。

读取出代码文本后，发送给 highlight.js 处理。

const html = hljs.highlightAuto(codeStr).value;

在 html 文件 <head> 区域添加

html

<link rel="stylesheet" href="/path/to/styles/default.min.css" />
<script src="/path/to/highlight.min.js"></script>
<script>
  hljs.highlightAll();
</script>

把生成的 html 代码插入到一个 html 文件中，左侧是行号，右侧是代码。

async function getHtml(srcFilePath) {
  const codeStr = await readContent(srcFilePath);
  const codeHtml = hljs.highlightAuto(codeStr).value;
  const preCode = `<pre><code>${codeHtml}</code></pre>`;
  const numberrow = codeStr
    .split('\n')
    .map((v, i) => fixWidth(i + 1))
    .join('\n');
  const number = `<pre><code class="language-plaintext" style="border-right: 1px solid #EFEFF5;">${numberrow}</code></pre>`;
  const body = `
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>${srcFilePath}</title>
    <link rel="stylesheet" href="../head/tomorrow.min.css">
    <script src="../head/highlight.min.js"></script>
    <script>hljs.highlightAll();</script>
    <link rel="stylesheet" href="../head/style.css">
</head>
<body>
<main>
<h1>${srcFilePath}</h1>
<div class="container">
${number}
${preCode}
</div>
</main>
</body>
</html>
`;
  return body;
}

生成 pdf 文件

使用 puppeteer 读取生成的 html 文件，打印成 pdf 文件。

async function writePdf(contents) {
  const browser = await puppeteer.launch({
    executablePath: 'C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe'
  });
  for (const content of contents) {
    const page = await browser.newPage(); // 必须要读取文件才能获得正确样式
    await page.goto(
      `file://C:\\Users\\allen\\company\\tmp\\html-code\\html\\${content}.html`
    );
    await page.pdf({
      margin: {
        top: '1cm',
        bottom: '1cm',
        left: '1cm',
        right: '1cm'
      },
      path: path.join('pdf', `${content}.pdf`),
      format: 'a4'
    });
  }
  await browser.close();
}

INFO

为什么不用 wkhtmltopdf？

起初考虑过使用 wkhtmltopdf，但是经过试用，存在 3 个问题：

wkhtmltopdf 无法支持 flex/grid 布局，无法实现我的需求。
wkhtmltopdf 是命令行文件，和 nodejs 组合较麻烦。
wkhtmltopdf 的仓库目前已经 archive 了。

看看源码文件打印的效果

html-code

合并 pdf 文件

生成了多个 pdf 文件后，使用 pdf-merger-js 合并多个文件生成一个 pdf 文件。

async function mergePdfs(contents, folderName = 'code') {
  const merger = new PDFMerger();

  for (const content of contents) {
    await merger.add(path.join('pdf', `${content}.pdf`));
  }
  const target = `合并_${folderName}.pdf`;
  await merger.save(target);
  return target;
}

还可以使用桔子工具合并，一站式制作高质量资料汇编。

一站式制作高质量资料汇编

添加页眉标识

使用 pdf-lib 库在 pdf 文件每一页的页眉添加标识。

async function addHeader(filePath, header, targetName) {
  // open a font synchronously
  const fontData = await fs.promises.readFile('simhei.ttf');
  // Load a PDFDocument from the existing PDF bytes
  const pdfDoc = await PDFDocument.load(await fs.promises.readFile(filePath));

  pdfDoc.registerFontkit(fontkit);
  const siyuanFont = await pdfDoc.embedFont(fontData);
  // Get the first page of the document
  const pages = pdfDoc.getPages();

  for (const page of pages) {
    page.drawText(header, {
      x: 280,
      y: 810,
      size: 16,
      font: siyuanFont,
      color: rgb(0.95, 0.1, 0.1)
      // rotate: degrees(-45)
    });
  }
  // Serialize the PDFDocument to bytes (a Uint8Array)
  const pdfBytes = await pdfDoc.save();
  const target = tryFileName(`加页眉_${targetName}.pdf`);
  await fs.promises.writeFile(target, pdfBytes);
}

实现的代码全文

// load the library and ALL languages
const hljs = require('highlight.js');
const fs = require('fs');
const puppeteer = require('puppeteer-core');
const path = require('path');

const PDFMerger = require('pdf-merger-js');
const { mkdir } = require('fs/promises');

function readContent(filePath) {
  return new Promise((resolve, reject) => {
    fs.readFile(filePath, (err, data) => {
      if (err) {
        console.error(err);
        reject(err);
        return;
      }
      // data 是二进制类型，需要转换成字符串
      resolve(data.toString());
    });
  });
}

function writeContent(filePath, content) {
  return new Promise((resolve, reject) => {
    const opt = {
      flag: 'w' // a：追加写入；w：覆盖写入
    };

    fs.writeFile(filePath, content, opt, (err) => {
      if (err) {
        console.error(err);
        reject(err);
      }
      resolve(filePath);
    });
  });
}

function fixWidth(val) {
  const str = `      ${val}`;
  return str.substring(str.length - 4);
}

async function getHtml(srcFilePath) {
  const codeStr = await readContent(srcFilePath);
  const codeHtml = hljs.highlightAuto(codeStr).value;
  const preCode = `<pre><code>${codeHtml}</code></pre>`;
  const numberrow = codeStr
    .split('\n')
    .map((v, i) => fixWidth(i + 1))
    .join('\n');
  const number = `<pre><code class="language-plaintext" style="border-right: 1px solid #EFEFF5;">${numberrow}</code></pre>`;
  const body = `
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>${srcFilePath}</title>
    <link rel="stylesheet" href="../head/tomorrow.min.css">
    <script src="../head/highlight.min.js"></script>
    <script>hljs.highlightAll();</script>
    <link rel="stylesheet" href="../head/style.css">
</head>
<body>
<main>
<h1>${srcFilePath}</h1>
<div class="container">
${number}
${preCode}
</div>

</main>
</body>
</html>
`;
  return body;
}

async function writePdf(contents) {
  const browser = await puppeteer.launch({
    executablePath: 'C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe'
  });
  for (const content of contents) {
    const page = await browser.newPage();
    // await page.setContent(content.html);

    // 必须要读取文件才能获得正确样式
    await page.goto(
      `file://C:\\Users\\allen\\company\\html-code\\html\\${content}.html`
    );

    console.log('pdf', content);

    await page.pdf({
      margin: {
        top: '1cm',
        bottom: '1cm',
        left: '1cm',
        right: '1cm'
      },
      path: path.join('pdf', `${content}.pdf`),
      format: 'a4'
    });
  }

  await browser.close();
}

async function mergePdfs(contents, folderName = 'code') {
  const merger = new PDFMerger();

  for (const content of contents) {
    await merger.add(path.join('pdf', `${content}.pdf`));
  }
  await merger.save(`合并_${folderName}.pdf`);
}

function _deleteDirFiles(url) {
  let files = [];
  if (fs.existsSync(url)) {
    //判断给定的路径是否存在
    files = fs.readdirSync(url); //返回文件和子目录的数组
    files.forEach(function (file, index) {
      const curPath = path.join(url, file);
      if (fs.statSync(curPath).isDirectory()) {
        //同步读取文件夹文件，如果是文件夹，则函数回调
        _deleteDirFiles(curPath);
      } else {
        fs.unlinkSync(curPath); //是指定文件，则删除
        console.log('清除文件', url);
      }
    });
    fs.rmdirSync(url); //清除文件夹
  } else {
    console.log('给定的路径不存在！');
  }
}

async function main() {
  _deleteDirFiles('html');
  _deleteDirFiles('pdf');

  mkdir('html');
  mkdir('pdf');

  if (process.argv.length !== 3) {
    console.log(`批量打印生成代码

使用方法：
  node index.js <源码文件夹>`);
    return;
  }

  console.log('源码文件夹', process.argv[2]);

  const projectFolderName = process.argv[2];

  // 文件列表要手工生成，并去除不需要的文件，比如图片、压缩包、不需要的配置文件等
  const files = await readContent('filelist.txt');
  const filePaths = files.split('\r\n').filter((v) => v);
  const contents = [];

  for (const file of filePaths) {
    const ht = await getHtml(path.join(projectFolderName, file));
    const target = file.replace(/\//g, '_');
    await writeContent(path.join('html', `${target}.html`), ht);
    console.log('html', target);

    contents.push(target);
  }
  await writePdf(contents);

  await mergePdfs(contents, path.basename(projectFolderName));

  console.log('Done');
}

main();

如何把代码批量转换成打印 pdf 文件？ ​

总体流程 ​

使用的技术栈 ​

具体实现 ​

获取要打印的源码文件 ​

读取代码文件生成 html ​

生成 pdf 文件 ​

合并 pdf 文件 ​

添加页眉标识 ​

联系我们