kokizzu · pull · May 3, 2025 · May 3, 2025 · Apr 18, 2025 · May 3, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,7 +2,11 @@
 
 ### Unreleased
 
+### [v0.17.1] - 2025-05-02
+
 - Fix null values in table cells rendering as `[object Object]`
+- Fix further LineWrapper precision issues
+- Optmize standard font handling. Less code, less memory usage
 
 ### [v0.17.0] - 2025-04-12
 

diff --git a/lib/font/afm.js b/lib/font/afm.js
@@ -1,5 +1,3 @@
-import fs from 'fs';
-
 const WIN_ANSI_MAP = {
   402: 131,
   8211: 150,
@@ -105,23 +103,16 @@ udieresis     yacute         thorn          ydieresis\
 `.split(/\s+/);
 
 class AFMFont {
-  static open(filename) {
-    return new AFMFont(fs.readFileSync(filename, 'utf8'));
-  }
-
+  /**
+   * @param {string} contents
+   */
   constructor(contents) {
-    this.contents = contents;
     this.attributes = {};
     this.glyphWidths = {};
     this.boundingBoxes = {};
     this.kernPairs = {};
 
-    this.parse();
-    // todo: remove charWidths since appears to not be used
-    this.charWidths = new Array(256);
-    for (let char = 0; char <= 255; char++) {
-      this.charWidths[char] = this.glyphWidths[characters[char]];
-    }
+    this.parse(contents);
 
     this.bbox = this.attributes['FontBBox'].split(/\s+/).map((e) => +e);
     this.ascender = +(this.attributes['Ascender'] || 0);
@@ -132,9 +123,12 @@ class AFMFont {
       this.bbox[3] - this.bbox[1] - (this.ascender - this.descender);
   }
 
-  parse() {
+  /**
+   * @param {string} contents
+   */
+  parse(contents) {
     let section = '';
-    for (let line of this.contents.split('\n')) {
+    for (let line of contents.split('\n')) {
       var match;
       var a;
       if ((match = line.match(/^Start(\w+)/))) {
@@ -179,6 +173,10 @@ class AFMFont {
     }
   }
 
+  /**
+   * @param {string} text
+   * @returns
+   */
   encodeText(text) {
     const res = [];
     for (let i = 0, len = text.length; i < len; i++) {

diff --git a/lib/image.js b/lib/image.js
@@ -15,8 +15,8 @@ class PDFImage {
     } else if (src instanceof ArrayBuffer) {
       data = Buffer.from(new Uint8Array(src));
     } else {
-      let match;
-      if ((match = /^data:.+?;base64,(.*)$/.exec(src))) {
+      const match = /^data:.+?;base64,(.*)$/.exec(src);
+      if (match) {
         data = Buffer.from(match[1], 'base64');
       } else {
         data = fs.readFileSync(src);

diff --git a/lib/line_wrapper.js b/lib/line_wrapper.js
@@ -85,10 +85,10 @@ class LineWrapper extends EventEmitter {
   }
 
   wordWidth(word) {
-    return (
+    return PDFNumber(
       this.document.widthOfString(word, this) +
-      this.characterSpacing +
-      this.wordSpacing
+        this.characterSpacing +
+        this.wordSpacing,
     );
   }
 

diff --git a/lib/mixins/attachments.js b/lib/mixins/attachments.js
@@ -33,8 +33,8 @@ export default {
     } else if (src instanceof ArrayBuffer) {
       data = Buffer.from(new Uint8Array(src));
     } else {
-      let match;
-      if ((match = /^data:(.*?);base64,(.*)$/.exec(src))) {
+      const match = /^data:(.*?);base64,(.*)$/.exec(src);
+      if (match) {
         if (match[1]) {
           refBody.Subtype = match[1].replace('/', '#2F');
         }

diff --git a/lib/utils.js b/lib/utils.js
@@ -1,8 +1,25 @@
+const fArray = new Float32Array(1);
+const uArray = new Uint32Array(fArray.buffer);
+
 export function PDFNumber(n) {
   // PDF numbers are strictly 32bit
-  // so convert this number to the nearest 32bit number
+  // so convert this number to a 32bit number
   // @see ISO 32000-1 Annex C.2 (real numbers)
-  return Math.fround(n);
+  const rounded = Math.fround(n);
+  if (rounded <= n) return rounded;
+
+  // Will have to perform 32bit float truncation
+  fArray[0] = n;
+
+  // Get the 32-bit representation as integer and shift bits
+  if (n <= 0) {
+    uArray[0] += 1;
+  } else {
+    uArray[0] -= 1;
+  }
+
+  // Return the float value
+  return fArray[0];
 }
 
 /**

diff --git a/package.json b/package.json
@@ -9,7 +9,7 @@
     "document",
     "vector"
   ],
-  "version": "0.17.0",
+  "version": "0.17.1",
   "homepage": "http://pdfkit.org/",
   "author": {
     "name": "Devon Govett",

diff --git a/tests/unit/helpers.js b/tests/unit/helpers.js
@@ -1,3 +1,24 @@
+/**
+ * @import PDFDocument from '../../lib/document';
+ */
+
+/**
+ * @typedef {object} TextStream
+ * @property {string} text
+ * @property {string} font
+ * @property {number} fontSize
+ *
+ * @typedef {string | Buffer} PDFDataItem
+ * @typedef {Array<PDFDataItem>} PDFData
+ *
+ * @typedef {object} PDFDataObject
+ * @property {PDFDataItem[]} items
+ */
+
+/**
+ * @param {PDFDocument} doc
+ * @return {PDFData}
+ */
 function logData(doc) {
   const loggedData = [];
   const originalMethod = doc._write;
@@ -18,4 +39,83 @@ function joinTokens(...args) {
   return r;
 }
 
-export { logData, joinTokens };
+/**
+ * @description
+ * Returns an array of objects from the PDF data. Object items are surrounded by /\d 0 obj/ and 'endobj'.
+ * @param {PDFData} data
+ * @return {Array<PDFDataObject>}
+ */
+function getObjects(data) {
+  const objects = [];
+  let currentObject = null;
+  for (const item of data) {
+    if (item instanceof Buffer) {
+      if (currentObject) {
+        currentObject.items.push(item);
+      }
+    } else if (typeof item === 'string') {
+      if (/^\d+\s0\sobj/.test(item)) {
+        currentObject = { items: [] };
+        objects.push(currentObject);
+      } else if (item === 'endobj') {
+        currentObject = null;
+      } else if (currentObject) {
+        currentObject.items.push(item);
+      }
+    }
+  }
+  return objects;
+}
+
+/**
+ * @param {Buffer} textStream
+ * @return {TextStream | undefined}
+ */
+function parseTextStream(textStream) {
+  const decodedStream = textStream.toString('utf8');
+
+  // Extract font and font size
+  const fontMatch = decodedStream.match(/\/([A-Za-z0-9]+)\s+(\d+)\s+Tf/);
+
+  if (!fontMatch) {
+    return undefined;
+  }
+
+  const font = fontMatch[1];
+  const fontSize = parseInt(fontMatch[2], 10);
+
+  // Extract hex strings inside TJ array
+  const tjMatch = decodedStream.match(/\[([^\]]+)\]\s+TJ/);
+  if (!tjMatch) {
+    return undefined;
+  }
+  let text = '';
+
+  // this is a simplified version
+  // the correct way is to retrieve the encoding from /Resources /Font dictionary and decode using it
+  // https://stackoverflow.com/a/29468049/5724645
+
+  // Match all hex strings like <...>
+  const hexMatches = [...tjMatch[1].matchAll(/<([0-9a-fA-F]+)>/g)];
+  for (const m of hexMatches) {
+    // Convert hex to string
+    const hex = m[1];
+    for (let i = 0; i < hex.length; i += 2) {
+      const code = parseInt(hex.substr(i, 2), 16);
+      let char = String.fromCharCode(code);
+      // Handle special cases
+      if (code === 0x0a) {
+        char = '\n'; // Newline
+      } else if (code === 0x0d) {
+        char = '\r'; // Carriage return
+      } else if (code === 0x85) {
+        char = '...';
+      }
+      text += char;
+    }
+  }
+
+  return { text, font, fontSize };
+}
+
+export { logData, joinTokens, parseTextStream, getObjects };
diff --git a/tests/unit/setupTests.js b/tests/unit/setupTests.js
@@ -1,5 +1,7 @@
-import matcher from './toContainChunk';
+import toContainChunk from './toContainChunk';
+import toContainText from './toContainText';
 import { toMatchImageSnapshot } from 'jest-image-snapshot';
 
-expect.extend(matcher);
+expect.extend(toContainChunk);
+expect.extend(toContainText);
 expect.extend({ toMatchImageSnapshot });
diff --git a/tests/unit/text.spec.js b/tests/unit/text.spec.js
@@ -15,33 +15,15 @@ describe('Text', () => {
     test('with simple content', () => {
       const docData = logData(document);
 
-      const textStream = Buffer.from(
-        `1 0 0 -1 0 792 cm
-q
-1 0 0 -1 0 792 cm
-BT
-1 0 0 1 72 711.384 Tm
-/F1 12 Tf
-[<73696d706c65207465> 30 <7874> 0] TJ
-ET
-Q
-`,
-        'binary',
-      );
-
       document.text('simple text');
       document.end();
 
-      expect(docData).toContainChunk([
-        `5 0 obj`,
-        `<<
-/Length 116
->>`,
-        `stream`,
-        textStream,
-        `\nendstream`,
-        `endobj`,
-      ]);
+      expect(docData).toContainText({ text: 'simple text' });
+    });
+
+    test('with destination', () => {
+      // just check that there is no exception
+      document.text('simple text', { destination: 'anchor' });
     });
 
     test('with content ending after page right margin', () => {
@@ -194,5 +176,21 @@ Q
         `endobj`,
       ]);
     });
+
+    test('bounded text precision - issue #1611', () => {
+      const docData = logData(document);
+      const text = 'New york';
+      const bounds = document.boundsOfString(text);
+      // Draw text which is constrained to the bounds
+      document.text(text, {
+        ellipsis: true,
+        width: bounds.width,
+        height: bounds.height,
+      });
+
+      document.end();
+
+      expect(docData).toContainText({ text });
+    });
   });
 });