Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
const HtmlCommentStart = '\x3C!--';
const HtmlCommentStart2 = '<!--';
const HtmlCommentEnd = '-->';
const styleTag = '<style';
const styleClosingTag = '</style>';
const nonWordCharacterRegex = /\W/;

/**
* @internal
* Exported only for unit test
*/
export function cleanHtmlComments(html: string) {
let { styleIndex, styleEndIndex } = extractHtmlIndexes(html);

while (styleIndex > -1) {
html = removeCommentsFromHtml(html, HtmlCommentStart, styleEndIndex, styleIndex);
html = removeCommentsFromHtml(html, HtmlCommentStart2, styleEndIndex, styleIndex);
html = removeCommentsFromHtml(html, HtmlCommentEnd, styleEndIndex, styleIndex);

({ styleIndex, styleEndIndex } = extractHtmlIndexes(html, styleEndIndex + 1));
}

return html;
}

function extractHtmlIndexes(html: string, startIndex: number = 0) {
const htmlLowercase = html.toLowerCase();
let styleIndex = htmlLowercase.indexOf(styleTag, startIndex);
let currentIndex = styleIndex + styleTag.length;
let nextChar = html.substring(currentIndex, currentIndex + 1);

while (!nonWordCharacterRegex.test(nextChar) && styleIndex > -1) {
styleIndex = htmlLowercase.indexOf(styleTag, styleIndex + 1);
currentIndex = styleIndex + styleTag.length;
nextChar = html.substring(currentIndex, currentIndex + 1);
}

const styleEndIndex = htmlLowercase.indexOf(styleClosingTag, startIndex);
return { styleIndex, styleEndIndex };
}

function removeCommentsFromHtml(
html: string,
marker: string,
endId: number,
startId: number
): string {
let id = html.indexOf(marker, startId);
while (id > -1 && id < endId) {
html = html.substring(0, id) + html.substring(id + marker.length);
id = html.indexOf(marker, id + 1);
}
return html;
}
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import { cleanHtmlComments } from './cleanHtmlComments';
import { cloneModelForPaste, mergePasteContent } from './mergePasteContent';
import { convertInlineCss } from '../createModelFromHtml/convertInlineCss';
import { createPasteFragment } from './createPasteFragment';
Expand Down Expand Up @@ -35,7 +36,11 @@ export function paste(
}

// 1. Prepare variables
const doc = createDOMFromHtml(clipboardData.rawHtml, editor.getDOMCreator());
const domCreator = editor.getDOMCreator();
if (!domCreator.isBypassed && clipboardData.rawHtml) {
clipboardData.rawHtml = cleanHtmlComments(clipboardData.rawHtml);
}
const doc = createDOMFromHtml(clipboardData.rawHtml, domCreator);
const pasteType =
typeof pasteTypeOrGetter == 'function'
? pasteTypeOrGetter(doc, clipboardData)
Expand All @@ -51,7 +56,7 @@ export function paste(
pasteType,
(clipboardData.rawHtml == clipboardData.html
? doc
: createDOMFromHtml(clipboardData.html, editor.getDOMCreator())
: createDOMFromHtml(clipboardData.html, domCreator)
)?.body
);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,5 +40,6 @@ function trustedHTMLHandlerToDOMCreator(trustedHTMLHandler?: LegacyTrustedHTMLHa
const handler = trustedHTMLHandler || defaultTrustHtmlHandler;
return {
htmlToDOM: (html: string) => new DOMParser().parseFromString(handler(html), 'text/html'),
isBypassed: !trustedHTMLHandler,
};
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import { cleanHtmlComments } from '../../../lib/command/paste/cleanHtmlComments';

describe('cleanHtmlComments', () => {
it('removes HTML comments within style tags', () => {
const input = `<head><style>/* Some CSS */<!-- This is a comment -->body { color: red; }<!-- Another comment --></style></head>`;
const expected =
'<head><style>/* Some CSS */ This is a comment body { color: red; } Another comment </style></head>';

expect(cleanHtmlComments(input)).toBe(expected);
});

it('removes HTML comments within style tags \x3C!--', () => {
const input = `<head><style>/* Some CSS */\x3C!-- This is a comment -->body { color: red; }<!-- Another comment --></style></head>`;
const expected =
'<head><style>/* Some CSS */ This is a comment body { color: red; } Another comment </style></head>';

expect(cleanHtmlComments(input)).toBe(expected);
});

it('does not remove comments outside style tags', () => {
const input = `<head><!-- This is a comment --><style>body { color: red; }</style><!-- Another comment --></head>`;
const expected =
'<head><!-- This is a comment --><style>body { color: red; }</style><!-- Another comment --></head>';
expect(cleanHtmlComments(input)).toBe(expected);
});

it('handles multiple style tags', () => {
const input = `<head><style><!-- Comment 1 -->body { color: red; }</style><style><!-- Comment 2 -->p { font-size: 16px; }</style></head>`;
const expected =
'<head><style> Comment 1 body { color: red; }</style><style> Comment 2 p { font-size: 16px; }</style></head>';

expect(cleanHtmlComments(input)).toBe(expected);
});

it('handles no style tags gracefully', () => {
const input = `
<head>
<!-- This is a comment -->
</head>
`;
const expected = `
<head>
<!-- This is a comment -->
</head>
`;
expect(cleanHtmlComments(input)).toBe(expected);
});

it('handles style tags', () => {
const input = '<head><style><!-- This is a comment --></style></head>';
const expected = '<head><style> This is a comment </style></head>';
expect(cleanHtmlComments(input)).toBe(expected);
});

it('handles style tags', () => {
const input = '<head><style>\x3C!-- This is a comment --></style></head>';
const expected = '<head><style> This is a comment </style></head>';
expect(cleanHtmlComments(input)).toBe(expected);
});

it('handle different style tags', () => {
const input =
'<head><style111><!--some text--></style111>some other text<style><!--... --></style></head>';
const expected =
'<head><style111><!--some text--></style111>some other text<style>... </style></head>';
expect(cleanHtmlComments(input)).toBe(expected);
});

it('handle different style tags in body', () => {
const input =
'<body><style111><!--some text--></style111>some other text<style><!--... --></style></body>';
const expected =
'<body><style111><!--some text--></style111>some other text<style>... </style></body>';
expect(cleanHtmlComments(input)).toBe(expected);
});

it('handle different style tags and attributes in body', () => {
const input =
'<body><style111 style=""><!--some text--></style111>some other text<style style=""><!--... --></style></body>';
const expected =
'<body><style111 style=""><!--some text--></style111>some other text<style style="">... </style></body>';
expect(cleanHtmlComments(input)).toBe(expected);
});

it('handles empty input', () => {
const input = '';
const expected = '';
expect(cleanHtmlComments(input)).toBe(expected);
});
});
Original file line number Diff line number Diff line change
Expand Up @@ -1397,7 +1397,6 @@ describe('pruneUnselectedModel', () => {
doc.blocks.push(para);

pruneUnselectedModel(doc);
navigator.clipboard.writeText(JSON.stringify(doc));
expect(doc).toEqual({
blockGroupType: 'Document',
blocks: [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ export class PastePlugin implements EditorPlugin {

switch (pasteSource) {
case 'wordDesktop':
processPastedContentFromWordDesktop(event, this.editor.getDOMCreator());
processPastedContentFromWordDesktop(event);
break;
case 'wacComponents':
processPastedContentWacComponents(event);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,28 @@
import { getObjectKeys } from 'roosterjs-content-model-dom';
import type { WordMetadata } from './WordMetadata';
import type { BeforePasteEvent, DOMCreator } from 'roosterjs-content-model-types';
import type { BeforePasteEvent } from 'roosterjs-content-model-types';

const FORMATING_REGEX = /[\n\t'{}"]+/g;
const STYLE_TAG = '<style>';
const STYLE_TAG_END = '</style>';

function extractStyleTagsFromHtml(htmlContent: string): string[] {
const styles: string[] = [];
const lowerCaseHtmlContent = htmlContent.toLowerCase();

let styleStartIndex = lowerCaseHtmlContent.indexOf(STYLE_TAG);
while (styleStartIndex >= 0) {
const styleEndIndex = lowerCaseHtmlContent.indexOf(STYLE_TAG_END, styleStartIndex);
if (styleEndIndex >= 0) {
const styleContent = htmlContent.substring(styleStartIndex + STYLE_TAG.length, styleEndIndex).trim();
styles.push(styleContent);
styleStartIndex = lowerCaseHtmlContent.indexOf(STYLE_TAG, styleEndIndex);
} else {
break;
}
}
return styles;
}

/**
* @internal
Expand All @@ -24,14 +44,11 @@ const FORMATING_REGEX = /[\n\t'{}"]+/g;
* 5. Save data in record and only use the required information.
*
*/
export function getStyleMetadata(ev: BeforePasteEvent, domCreator: DOMCreator) {
export function getStyleMetadata(ev: BeforePasteEvent) {
const metadataMap: Map<string, WordMetadata> = new Map();
const doc = domCreator.htmlToDOM(ev.htmlBefore);
const styles = doc.querySelectorAll('style');

styles.forEach(style => {
const text = style?.innerHTML.trim() || '';
const headStyles = extractStyleTagsFromHtml(ev.htmlBefore);

headStyles.forEach(text => {
let index = 0;
while (index >= 0) {
const indexAt = text.indexOf('@', index + 1);
Expand Down Expand Up @@ -77,5 +94,6 @@ export function getStyleMetadata(ev: BeforePasteEvent, domCreator: DOMCreator) {
}
}
});

return metadataMap;
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ import type {
ContentModelBlockFormat,
ContentModelListItemLevelFormat,
ContentModelTableFormat,
DOMCreator,
DomToModelContext,
ElementProcessor,
FormatParser,
Expand All @@ -26,8 +25,8 @@ const DEFAULT_BROWSER_LINE_HEIGHT_PERCENTAGE = 1.2;
* Handles Pasted content when source is Word Desktop
* @param ev BeforePasteEvent
*/
export function processPastedContentFromWordDesktop(ev: BeforePasteEvent, domCreator: DOMCreator) {
const metadataMap: Map<string, WordMetadata> = getStyleMetadata(ev, domCreator);
export function processPastedContentFromWordDesktop(ev: BeforePasteEvent) {
const metadataMap: Map<string, WordMetadata> = getStyleMetadata(ev);

setProcessor(ev.domToModelOption, 'element', wordDesktopElementProcessor(metadataMap));
addParser(ev.domToModelOption, 'block', adjustPercentileLineHeight);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,17 +1,13 @@
import { BeforePasteEvent } from 'roosterjs-content-model-types';
import { getStyleMetadata } from '../../../lib/paste/WordDesktop/getStyleMetadata';

const domCreator = {
htmlToDOM: (html: string) => new DOMParser().parseFromString(html, 'text/html'),
};

describe('getStyleMetadata', () => {
it('Extract metadata from style element', () => {
const event = <BeforePasteEvent>(<any>{
htmlBefore:
'<style><!--/*FontDefinitions*/@font-face{font-family:"MSMincho";panose-1:2269425834;mso-font-alt:"MS明朝";mso-font-charset:128;mso-generic-font-family:modern;mso-font-pitch:fixed;mso-font-signature:-536870145179149157913421774601312310;}@font-face{font-family:"CambriaMath";panose-1:2453546324;mso-font-charset:0;mso-generic-font-family:roman;mso-font-pitch:variable;mso-font-signature:-53686912111073057273355443204150;}@font-face{font-family:Aptos;mso-font-charset:0;mso-generic-font-family:swiss;mso-font-pitch:variable;mso-font-signature:5368715593004150;}@font-face{font-family:"@MSMincho";panose-1:2269425834;mso-font-charset:128;mso-generic-font-family:modern;mso-font-pitch:fixed;mso-font-signature:-536870145179149157913421774601312310;}/*StyleDefinitions*/p.MsoNormal,li.MsoNormal,div.MsoNormal{mso-style-unhide:no;mso-style-qformat:yes;mso-style-parent:"";margin-top:0in;margin-right:0in;margin-bottom:8.0pt;margin-left:0in;line-height:116%;mso-pagination:widow-orphan;font-size:12.0pt;font-family:"Aptos",sans-serif;mso-ascii-font-family:Aptos;mso-ascii-theme-font:minor-latin;mso-fareast-font-family:"MSMincho";mso-fareast-theme-font:minor-fareast;mso-hansi-font-family:Aptos;mso-hansi-theme-font:minor-latin;mso-bidi-font-family:Arial;mso-bidi-theme-font:minor-bidi;}p.MsoListParagraph,li.MsoListParagraph,div.MsoListParagraph{mso-style-priority:34;mso-style-unhide:no;mso-style-qformat:yes;margin-top:0in;margin-right:0in;margin-bottom:8.0pt;margin-left:.5in;mso-add-space:auto;line-height:116%;mso-pagination:widow-orphan;font-size:12.0pt;font-family:"Aptos",sans-serif;mso-ascii-font-family:Aptos;mso-ascii-theme-font:minor-latin;mso-fareast-font-family:"MSMincho";mso-fareast-theme-font:minor-fareast;mso-hansi-font-family:Aptos;mso-hansi-theme-font:minor-latin;mso-bidi-font-family:Arial;mso-bidi-theme-font:minor-bidi;}p.MsoListParagraphCxSpFirst,li.MsoListParagraphCxSpFirst,div.MsoListParagraphCxSpFirst{mso-style-priority:34;mso-style-unhide:no;mso-style-qformat:yes;mso-style-type:export-only;margin-top:0in;margin-right:0in;margin-bottom:0in;margin-left:.5in;mso-add-space:auto;line-height:116%;mso-pagination:widow-orphan;font-size:12.0pt;font-family:"Aptos",sans-serif;mso-ascii-font-family:Aptos;mso-ascii-theme-font:minor-latin;mso-fareast-font-family:"MSMincho";mso-fareast-theme-font:minor-fareast;mso-hansi-font-family:Aptos;mso-hansi-theme-font:minor-latin;mso-bidi-font-family:Arial;mso-bidi-theme-font:minor-bidi;}p.MsoListParagraphCxSpMiddle,li.MsoListParagraphCxSpMiddle,div.MsoListParagraphCxSpMiddle{mso-style-priority:34;mso-style-unhide:no;mso-style-qformat:yes;mso-style-type:export-only;margin-top:0in;margin-right:0in;margin-bottom:0in;margin-left:.5in;mso-add-space:auto;line-height:116%;mso-pagination:widow-orphan;font-size:12.0pt;font-family:"Aptos",sans-serif;mso-ascii-font-family:Aptos;mso-ascii-theme-font:minor-latin;mso-fareast-font-family:"MSMincho";mso-fareast-theme-font:minor-fareast;mso-hansi-font-family:Aptos;mso-hansi-theme-font:minor-latin;mso-bidi-font-family:Arial;mso-bidi-theme-font:minor-bidi;}p.MsoListParagraphCxSpLast,li.MsoListParagraphCxSpLast,div.MsoListParagraphCxSpLast{mso-style-priority:34;mso-style-unhide:no;mso-style-qformat:yes;mso-style-type:export-only;margin-top:0in;margin-right:0in;margin-bottom:8.0pt;margin-left:.5in;mso-add-space:auto;line-height:116%;mso-pagination:widow-orphan;font-size:12.0pt;font-family:"Aptos",sans-serif;mso-ascii-font-family:Aptos;mso-ascii-theme-font:minor-latin;mso-fareast-font-family:"MSMincho";mso-fareast-theme-font:minor-fareast;mso-hansi-font-family:Aptos;mso-hansi-theme-font:minor-latin;mso-bidi-font-family:Arial;mso-bidi-theme-font:minor-bidi;}.MsoChpDefault{mso-style-type:export-only;mso-default-props:yes;font-family:"Aptos",sans-serif;mso-ascii-font-family:Aptos;mso-ascii-theme-font:minor-latin;mso-fareast-font-family:"MSMincho";mso-fareast-theme-font:minor-fareast;mso-hansi-font-family:Aptos;mso-hansi-theme-font:minor-latin;mso-bidi-font-family:Arial;mso-bidi-theme-font:minor-bidi;mso-font-kerning:0pt;mso-ligatures:none;}.MsoPapDefault{mso-style-type:export-only;margin-bottom:8.0pt;line-height:116%;}@pageWordSection1{size:8.5in11.0in;margin:1.0in1.0in1.0in1.0in;mso-header-margin:.5in;mso-footer-margin:.5in;mso-paper-source:0;}div.WordSection1{page:WordSection1;}/*ListDefinitions*/@listl0{mso-list-id:1153643359;mso-list-type:hybrid;mso-list-template-ids:133990276544500572-1-1-1-1-1-1-1-1;}@listl0:level1{mso-level-number-format:roman-upper;mso-level-text:"%1)";mso-level-tab-stop:none;mso-level-number-position:right;text-indent:-.25in;}@listl0:level2{mso-level-number-format:alpha-lower;mso-level-tab-stop:none;mso-level-number-position:left;text-indent:-.25in;}@listl0:level3{mso-level-number-format:roman-lower;mso-level-tab-stop:none;mso-level-number-position:right;text-indent:-9.0pt;}@listl0:level4{mso-level-tab-stop:none;mso-level-number-position:left;text-indent:-.25in;}@listl0:level5{mso-level-number-format:alpha-lower;mso-level-tab-stop:none;mso-level-number-position:left;text-indent:-.25in;}@listl0:level6{mso-level-number-format:roman-lower;mso-level-tab-stop:none;mso-level-number-position:right;text-indent:-9.0pt;}@listl0:level7{mso-level-tab-stop:none;mso-level-number-position:left;text-indent:-.25in;}@listl0:level8{mso-level-number-format:alpha-lower;mso-level-tab-stop:none;mso-level-number-position:left;text-indent:-.25in;}@listl0:level9{mso-level-number-format:roman-lower;mso-level-tab-stop:none;mso-level-number-position:right;text-indent:-9.0pt;}ol{margin-bottom:0in;}ul{margin-bottom:0in;}--></style>',
});
const result = getStyleMetadata(event, domCreator);
const result = getStyleMetadata(event);

expect(result.get('l0:level1')).toEqual({
'mso-level-number-format': 'roman-upper',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,6 @@ import {
moveChildNodes,
} from 'roosterjs-content-model-dom';

const domCreator = {
htmlToDOM: (html: string) => new DOMParser().parseFromString(html, 'text/html'),
};

describe('processPastedContentFromWordDesktopTest', () => {
let div: HTMLElement;
let fragment: DocumentFragment;
Expand All @@ -31,7 +27,7 @@ describe('processPastedContentFromWordDesktopTest', () => {
moveChildNodes(fragment, div);
}
const event = createBeforePasteEventMock(fragment, htmlBefore);
processPastedContentFromWordDesktop(event, domCreator);
processPastedContentFromWordDesktop(event);

const model = domToContentModel(
fragment,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,16 @@ export type LegacyTrustedHTMLHandler = (html: string) => string;
* A handler type to convert HTML string to a DOM object
*/
export interface DOMCreator {
/**
* Callback to convert HTML string to a DOM object
*/
htmlToDOM: (html: string) => Document;

/**
* Flag to indicate if this handler is bypassed or not.
* If this is true, it means that when converting HTML string to DOM object, we don't need to do any conversion.
*/
isBypassed?: boolean;
}

/**
Expand Down
10 changes: 5 additions & 5 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -658,7 +658,7 @@

"@types/dompurify@2.2.3":
version "2.2.3"
resolved "https://registry.yarnpkg.com/@types/dompurify/-/dompurify-2.2.3.tgz#6e89677a07902ac1b6821c345f34bd85da239b08"
resolved "https://registry.npmjs.org/@types/dompurify/-/dompurify-2.2.3.tgz#6e89677a07902ac1b6821c345f34bd85da239b08"
integrity sha512-CLtc2mZK8+axmrz1JqtpklO/Kvn38arGc8o1l3UVopZaXXuer9ONdZwJ/9f226GrhRLtUmLr9WrvZsRSNpS8og==
dependencies:
"@types/trusted-types" "*"
Expand Down Expand Up @@ -773,9 +773,9 @@
integrity sha512-OxepLK9EuNEIPxWNME+C6WwbRAOOI2o2BaQEGzz5Lu2e4Z5eDnEo+/aVEDMIXywoJitJ7xWd641wrGLZdtwRyw==

"@types/trusted-types@*":
version "2.0.2"
resolved "https://registry.yarnpkg.com/@types/trusted-types/-/trusted-types-2.0.2.tgz#fc25ad9943bcac11cceb8168db4f275e0e72e756"
integrity sha512-F5DIZ36YVLE+PN+Zwws4kJogq47hNgX3Nx6WyDJ3kcplxyke3XIzB8uK5n/Lpm1HBsbGzd6nmGehL8cPekP+Tg==
version "2.0.7"
resolved "https://registry.npmjs.org/@types/trusted-types/-/trusted-types-2.0.7.tgz#baccb07a970b91707df3a3e8ba6896c57ead2d11"
integrity sha512-ScaPdn1dQczgbl0QFTeTOmVHFULt394XJgOQNoyVhZ6r2vLnMLJfBPd53SB52T/3G36VI1/g2MZaX0cwDuXsfw==

"@types/yargs-parser@*":
version "21.0.3"
Expand Down Expand Up @@ -2268,7 +2268,7 @@ dom-serialize@^2.2.1:

dompurify@2.5.4:
version "2.5.4"
resolved "https://registry.yarnpkg.com/dompurify/-/dompurify-2.5.4.tgz#347e91070963b22db31c7c8d0ce9a0a2c3c08746"
resolved "https://registry.npmjs.org/dompurify/-/dompurify-2.5.4.tgz#347e91070963b22db31c7c8d0ce9a0a2c3c08746"
integrity sha512-l5NNozANzaLPPe0XaAwvg3uZcHtDBnziX/HjsY1UcDj1MxTK8Dd0Kv096jyPK5HRzs/XM5IMj20dW8Fk+HnbUA==

ecc-jsbn@~0.1.1:
Expand Down
Loading