Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion packages/super-editor/src/core/super-converter/exporter.js
Original file line number Diff line number Diff line change
Expand Up @@ -565,7 +565,7 @@ export class DocxExporter {
#replaceSpecialCharacters(text) {
if (text === undefined || text === null) return text;
return String(text)
.replace(/&(?!#\d+;|#x[0-9a-fA-F]+;|(?:amp|lt|gt|quot|apos);)/g, '&')
.replace(/&/g, '&')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;')
.replace(/"/g, '&quot;')
Expand Down Expand Up @@ -613,6 +613,14 @@ export class DocxExporter {
let { name } = node;
const { elements, attributes } = node;

// Normalize w:delInstrText → w:instrText. During import, w:del wrappers around
// field character runs lose their trackDelete marks (only text content gets marked),
// so on export the w:del wrapper is absent. Per ECMA-376 §17.16.13, w:delInstrText
// outside w:del is non-conformant — renaming to w:instrText keeps the field valid.
if (name === 'w:delInstrText') {
name = 'w:instrText';
}

let tag = `<${name}`;

for (let attr in attributes) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,6 @@ export const mergeRelationshipElements = (existingRelationships = [], newRelatio

const attributes = rel.attributes;
const currentId = attributes.Id || '';
attributes.Target = attributes?.Target?.replace(/&/g, '&amp;');

const existingTarget = existingRelationships.find((el) => el.attributes.Target === attributes.Target);
// Images added in collaboration mode may miss relations but have an ID.
const isNewHyperlink = attributes.Type === HYPERLINK_RELATIONSHIP_TYPE && currentId.length > 6;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,8 +93,8 @@ describe('mergeRelationshipElements', () => {
expect(matches).toHaveLength(1);
});

it('treats escaped and unescaped ampersands in Targets as duplicates', () => {
const existing = [rel('rId1', 'http://schemas.../image', 'media/company&amp;logo.png')];
it('deduplicates targets that contain ampersands', () => {
const existing = [rel('rId1', 'http://schemas.../image', 'media/company&logo.png')];
const toAdd = [rel('rId2', 'http://schemas.../image', 'media/company&logo.png')];
const merged = mergeRelationshipElements(existing, toAdd);
const matches = merged.filter((r) => r.attributes.Target.includes('company'));
Expand Down
104 changes: 102 additions & 2 deletions packages/super-editor/src/tests/export/docxExporter.test.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { describe, it, expect } from 'vitest';
import { DocxExporter } from '@core/super-converter/exporter.js';
import { mergeRelationshipElements } from '@core/super-converter/relationship-helpers.js';

describe('DocxExporter', () => {
// Helper to create a minimal converter stub
Expand Down Expand Up @@ -40,7 +41,7 @@ describe('DocxExporter', () => {
expect(xml).toContain('Format=&lt;&lt;NUM&gt;&gt;_&lt;&lt;VER&gt;&gt;');
});

it('does not double-escape ampersands in text nodes', () => {
it('encodes all ampersands in text nodes including entity-like sequences', () => {
const exporter = new DocxExporter(createConverterStub());

const data = {
Expand All @@ -52,6 +53,8 @@ describe('DocxExporter', () => {
elements: [
{
type: 'text',
// After XML parsing, &amp;amp; becomes &amp; and &amp; becomes &
// Both must be re-encoded on export
text: 'Rock & Roll &amp; Jazz',
},
],
Expand All @@ -61,7 +64,70 @@ describe('DocxExporter', () => {

const xml = exporter.schemaToXml(data);

expect(xml).toContain('Rock &amp; Roll &amp; Jazz');
// Bare & encodes to &amp;, and &amp; (literal text from decoded XML) encodes to &amp;amp;
expect(xml).toContain('Rock &amp; Roll &amp;amp; Jazz');
});

it('preserves distinct style names with entity-like characters in attributes', () => {
const exporter = new DocxExporter(createConverterStub());

// Simulates two styles that xml-js decoded from the original DOCX:
// Style 1 had w:name w:val="Body First Line .5&quot;" → decoded to: Body First Line .5"
// Style 2 had w:name w:val="Body First Line .5&amp;quot;" → decoded to: Body First Line .5&quot;
const data = {
name: 'w:styles',
attributes: {},
elements: [
{
name: 'w:style',
attributes: { 'w:styleId': 'BodyFirstLine5' },
elements: [{ name: 'w:name', attributes: { 'w:val': 'Body First Line .5"' } }],
},
{
name: 'w:style',
attributes: { 'w:styleId': 'BodyFirstLine5quot' },
elements: [{ name: 'w:name', attributes: { 'w:val': 'Body First Line .5&quot;' } }],
},
],
};

const xml = exporter.schemaToXml(data);

// The two style names must remain distinct in the output XML
expect(xml).toContain('w:val="Body First Line .5&quot;"');
expect(xml).toContain('w:val="Body First Line .5&amp;quot;"');
});

it('does not double-escape pre-escaped relationship targets in attributes', () => {
const exporter = new DocxExporter(createConverterStub());

const relationships = mergeRelationshipElements(
[],
[
{
type: 'element',
name: 'Relationship',
attributes: {
Id: 'rId1',
Type: 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink',
Target: 'https://example.com/page?x=1&y=2',
TargetMode: 'External',
},
},
],
);

const data = {
name: 'Relationships',
attributes: {
xmlns: 'http://schemas.openxmlformats.org/package/2006/relationships',
},
elements: relationships,
};

const xml = exporter.schemaToXml(data);

expect(xml).toContain('Target="https://example.com/page?x=1&amp;y=2"');
expect(xml).not.toContain('&amp;amp;');
});

Expand Down Expand Up @@ -379,6 +445,40 @@ describe('DocxExporter', () => {
expect(xml).toContain('FIELD[[sdspace]]INSTRUCTION');
});

it('normalizes w:delInstrText to w:instrText when not inside w:del', () => {
const exporter = new DocxExporter(createConverterStub());

const data = {
name: 'w:document',
attributes: {},
elements: [
{
name: 'w:r',
attributes: {},
elements: [
{
name: 'w:delInstrText',
attributes: { 'xml:space': 'preserve' },
elements: [
{
type: 'text',
text: ' REF _Ref258418237 \\h ',
},
],
},
],
},
],
};

const xml = exporter.schemaToXml(data);

// w:delInstrText must be renamed to w:instrText per ECMA-376 §17.16.13
expect(xml).toContain('<w:instrText');
expect(xml).not.toContain('w:delInstrText');
expect(xml).toContain('REF _Ref258418237');
});

it('handles special characters along with [[sdspace]] placeholders', () => {
const exporter = new DocxExporter(createConverterStub());

Expand Down
Loading