Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix pasting MS Word table #55206

Open
wants to merge 4 commits into
base: trunk
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import { useRef } from '@wordpress/element';
import { useRefEffect } from '@wordpress/compose';
import {
pasteHandler,
privateApis,
findTransform,
getBlockTransforms,
} from '@wordpress/blocks';
Expand All @@ -16,7 +17,9 @@ import { isURL } from '@wordpress/url';
*/
import { addActiveFormats } from './utils';
import { splitValue } from './split-value';
import { getPasteEventData } from '../../utils/pasting';
import { unlock } from '../../lock-unlock';

const { getClipboardEventData } = unlock( privateApis );

/** @typedef {import('@wordpress/rich-text').RichTextValue} RichTextValue */

Expand All @@ -43,7 +46,7 @@ export function usePasteHandler( props ) {
return;
}

const { plainText, html, files } = getPasteEventData( event );
const { plainText, html, files } = getClipboardEventData( event );

event.preventDefault();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import {
serialize,
pasteHandler,
privateApis,
createBlock,
findTransform,
getBlockTransforms,
Expand All @@ -19,9 +20,11 @@ import { useRefEffect } from '@wordpress/compose';
/**
* Internal dependencies
*/
import { getPasteEventData } from '../../utils/pasting';
import { store as blockEditorStore } from '../../store';
import { useNotifyCopy } from '../../utils/use-notify-copy';
import { unlock } from '../../lock-unlock';

const { getClipboardEventData } = unlock( privateApis );

export default function useClipboardHandler() {
const {
Expand Down Expand Up @@ -153,7 +156,8 @@ export default function useClipboardHandler() {
__experimentalCanUserUseUnfilteredHTML:
canUserUseUnfilteredHTML,
} = getSettings();
const { plainText, html, files } = getPasteEventData( event );
const { plainText, html, files } =
getClipboardEventData( event );
let blocks = [];

if ( files.length ) {
Expand Down
4 changes: 4 additions & 0 deletions packages/blocks/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -463,6 +463,10 @@ _Returns_

- `Array|string`: A list of blocks or a string, depending on `handlerMode`.

### privateApis

Private @wordpress/blocks APIs.

### rawHandler

Converts an HTML string to known blocks.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,51 +4,15 @@
import { getFilesFromDataTransfer } from '@wordpress/dom';

/**
* Normalizes a given string of HTML to remove the Windows-specific "Fragment"
* comments and any preceding and trailing content.
*
* @param {string} html the html to be normalized
* @return {string} the normalized html
* Internal dependencies
*/
function removeWindowsFragments( html ) {
const startStr = '<!--StartFragment-->';
const startIdx = html.indexOf( startStr );
if ( startIdx > -1 ) {
html = html.substring( startIdx + startStr.length );
} else {
// No point looking for EndFragment
return html;
}

const endStr = '<!--EndFragment-->';
const endIdx = html.indexOf( endStr );
if ( endIdx > -1 ) {
html = html.substring( 0, endIdx );
}
import { deepFilterHTML } from './utils';
import wrapperRemover from './wrapper-remover';
import msFragmentRemover from './ms-fragment-remover';
import metaRemover from './meta-remover';

return html;
}

/**
* Removes the charset meta tag inserted by Chromium.
* See:
* - https://github.com/WordPress/gutenberg/issues/33585
* - https://bugs.chromium.org/p/chromium/issues/detail?id=1264616#c4
*
* @param {string} html the html to be stripped of the meta tag.
* @return {string} the cleaned html
*/
function removeCharsetMetaTag( html ) {
const metaTag = `<meta charset='utf-8'>`;

if ( html.startsWith( metaTag ) ) {
return html.slice( metaTag.length );
}

return html;
}

export function getPasteEventData( { clipboardData } ) {
export function getClipboardEventData( event ) {
const { clipboardData } = event;
let plainText = '';
let html = '';

Expand All @@ -69,11 +33,11 @@ export function getPasteEventData( { clipboardData } ) {
}
}

// Remove Windows-specific metadata appended within copied HTML text.
html = removeWindowsFragments( html );

// Strip meta tag.
html = removeCharsetMetaTag( html );
html = deepFilterHTML( html, [
wrapperRemover,
msFragmentRemover,
metaRemover,
] );

const files = getFilesFromDataTransfer( clipboardData );

Expand Down
13 changes: 13 additions & 0 deletions packages/blocks/src/api/raw-handling/meta-remover.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
/**
* Removes the charset meta tag inserted by Chromium, along with any other ones.
* See:
* - https://github.com/WordPress/gutenberg/issues/33585
* - https://bugs.chromium.org/p/chromium/issues/detail?id=1264616#c4
*
* @param {Node} node The node to be processed.
*/
export default function metaRemover( node ) {
if ( node.nodeName === 'META' ) {
node.remove();
}
}
18 changes: 18 additions & 0 deletions packages/blocks/src/api/raw-handling/ms-fragment-remover.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
/**
* WordPress dependencies
*/
import { remove } from '@wordpress/dom';

/**
* Windows-specific "Fragment" comments.
*
* @param {Node} node The node to be processed.
*/
export default function msFragmentRemover( node ) {
if (
node.nodeType === node.COMMENT_NODE &&
[ 'StartFragment', 'EndFragment' ].includes( node.textContent.trim() )
) {
remove( node );
}
}
19 changes: 8 additions & 11 deletions packages/blocks/src/api/raw-handling/paste-handler.js
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ import brRemover from './br-remover';
import { deepFilterHTML, isPlain, getBlockContentSchema } from './utils';
import emptyParagraphRemover from './empty-paragraph-remover';
import slackParagraphCorrector from './slack-paragraph-corrector';
import wrapperRemover from './wrapper-remover';
import msFragmentRemover from './ms-fragment-remover';
import metaRemover from './meta-remover';

/**
* Browser dependencies
Expand Down Expand Up @@ -90,17 +93,11 @@ export function pasteHandler( {
tagName,
preserveWhiteSpace,
} ) {
// First of all, strip any meta tags.
HTML = HTML.replace( /<meta[^>]+>/g, '' );
// Strip Windows markers.
HTML = HTML.replace(
/^\s*<html[^>]*>\s*<body[^>]*>(?:\s*<!--\s*StartFragment\s*-->)?/i,
''
);
HTML = HTML.replace(
/(?:<!--\s*EndFragment\s*-->\s*)?<\/body>\s*<\/html>\s*$/i,
''
);
HTML = deepFilterHTML( HTML, [
wrapperRemover,
msFragmentRemover,
metaRemover,
] );

// If we detect block delimiters in HTML, parse entirely as blocks.
if ( mode !== 'INLINE' ) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/**
* Internal dependencies
*/
import { shouldDismissPastedFiles } from '../pasting';
import { shouldDismissPastedFiles } from '../get-clipboard-event-data';

const mocks = {
pngImageFile: { type: 'image/png' },
Expand Down
10 changes: 10 additions & 0 deletions packages/blocks/src/api/raw-handling/wrapper-remover.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
/**
* WordPress dependencies
*/
import { unwrap } from '@wordpress/dom';

export default function wrapperRemover( node ) {
if ( [ 'BODY', 'HTML' ].includes( node.nodeName ) ) {
unwrap( node );
}
}
1 change: 1 addition & 0 deletions packages/blocks/src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@
export { store } from './store';
export * from './api';
export * from './deprecated';
export { privateApis } from './private-apis';
13 changes: 13 additions & 0 deletions packages/blocks/src/private-apis.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
/**
* Internal dependencies
*/
import { lock } from './lock-unlock';
import { getClipboardEventData } from './api/raw-handling/get-clipboard-event-data';

/**
* Private @wordpress/blocks APIs.
*/
export const privateApis = {};
lock( privateApis, {
getClipboardEventData,
} );
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ exports[`Blocks raw handling pasteHandler iframe-embed 1`] = `""`;

exports[`Blocks raw handling pasteHandler markdown 1`] = `"This is a heading with <em>italic</em><br>This is a paragraph with a <a href="https://w.org/">link</a>, <strong>bold</strong>, and <del>strikethrough</del>.<br>Preserve<br>line breaks please.<br>Lists<br>A<br>Bulleted Indented<br>List<br>One<br>Two<br>Three<br>Table<br>First Header<br>Second Header<br>Content from cell 1<br>Content from cell 2<br>Content in the first column<br>Content in the second column<br><br><br><br>Table with empty cells.<br>Quote<br>First<br>Second<br>Code<br>Inline <code>code</code> tags should work.<br><code>This is a code block.</code>"`;

exports[`Blocks raw handling pasteHandler ms-excel 1`] = `"<br><br><br>Cell 1<br>Cell 2<br>Cell 3<br>Cell 4"`;

exports[`Blocks raw handling pasteHandler ms-word 1`] = `"This is a title<br>&nbsp;<br>This is a subtitle<br>&nbsp;<br>This is a heading level 1<br>&nbsp;<br>This is a heading level 2<br>&nbsp;<br>This is a <strong>paragraph</strong> with a <a href="https://w.org/">link</a>.<br>&nbsp;<br>A<br>Bulleted<br>Indented<br>List<br>&nbsp;<br>One<br>Two<br>Three<br>&nbsp;<br>One<br>Two<br>Three<br>1<br>2<br>3<br>I<br>II<br>III<br>&nbsp;<br>An image:<br>&nbsp;<br><img width="451" height="338" src="file:LOW-RES.png"><br><a href="#anchor">This is an anchor link</a> that leads to the next paragraph.<br><a id="anchor">This is the paragraph with the anchor.</a><br><a href="#nowhere">This is an anchor link</a> that leads nowhere.<br><a>This is a paragraph with an anchor with no link pointing to it.</a><br>This is a reference to a footnote<a href="#_ftn1" id="_ftnref1">[1]</a>.<br>This is a reference to an endnote<a href="#_edn1" id="_ednref1">[i]</a>.<br><br><br><a href="#_ftnref1" id="_ftn1">[1]</a> This is a footnote.<br><br><br><a href="#_ednref1" id="_edn1">[i]</a> This is an endnote."`;

exports[`Blocks raw handling pasteHandler ms-word-list 1`] = `"<a>This is a headline?</a><br>This is a text:<br>One<br>Two<br>Three<br><a>Lorem Ipsum.</a><br>&nbsp;"`;
Expand Down
1 change: 1 addition & 0 deletions test/integration/blocks-raw-handling.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,7 @@ describe( 'Blocks raw handling', () => {
'google-docs-table-with-rowspan',
'google-docs-table-with-comments',
'google-docs-with-comments',
'ms-excel',
'ms-word',
'ms-word-list',
'ms-word-styled',
Expand Down
96 changes: 96 additions & 0 deletions test/integration/fixtures/documents/ms-excel-in.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
<html xmlns:o="urn:schemas-microsoft-com:office:office"
xmlns:x="urn:schemas-microsoft-com:office:excel"
xmlns="http://www.w3.org/TR/REC-html40">

<head>
<meta http-equiv=Content-Type content="text/html; charset=utf-8">
<meta name=ProgId content=Excel.Sheet>
<meta name=Generator content="Microsoft Excel 15">
<link id=Main-File rel=Main-File
href="file:///C:/Users/user/AppData/Local/Temp/msohtmlclip1/01/clip.htm">
<link rel=File-List
href="file:///C:/Users/user/AppData/Local/Temp/msohtmlclip1/01/clip_filelist.xml">
<style>
<!--table
{mso-displayed-decimal-separator:"\.";
mso-displayed-thousand-separator:"\,";}
@page
{margin:.75in .7in .75in .7in;
mso-header-margin:.3in;
mso-footer-margin:.3in;}
.font5
{color:windowtext;
font-size:6.0pt;
font-weight:400;
font-style:normal;
text-decoration:none;
font-family:游ゴシック, monospace;
mso-font-charset:128;}
tr
{mso-height-source:auto;
mso-ruby-visibility:none;}
col
{mso-width-source:auto;
mso-ruby-visibility:none;}
br
{mso-data-placement:same-cell;}
td
{padding-top:1px;
padding-right:1px;
padding-left:1px;
mso-ignore:padding;
color:black;
font-size:11.0pt;
font-weight:400;
font-style:normal;
text-decoration:none;
font-family:游ゴシック, monospace;
mso-font-charset:128;
mso-number-format:General;
text-align:general;
vertical-align:bottom;
border:none;
mso-background-source:auto;
mso-pattern:auto;
mso-protection:locked visible;
white-space:nowrap;
mso-rotate:0;}
.xl65
{font-family:"Noto Sans JP", sans-serif;
mso-font-charset:128;}
ruby
{ruby-align:left;}
rt
{color:windowtext;
font-size:6.0pt;
font-weight:400;
font-style:normal;
text-decoration:none;
font-family:游ゴシック, monospace;
mso-font-charset:128;
mso-char-type:katakana;
display:none;}
-->
</style>
</head>

<body link="#0563C1" vlink="#954F72">

<table border=0 cellpadding=0 cellspacing=0 width=140 style='border-collapse:
collapse;width:106pt'>
<!--StartFragment-->
<col width=70 span=2 style='width:53pt'>
<tr height=24 style='height:18.0pt'>
<td height=24 class=xl65 width=70 style='height:18.0pt;width:53pt'>Cell 1</td>
<td class=xl65 width=70 style='width:53pt'>Cell 2</td>
</tr>
<tr height=24 style='height:18.0pt'>
<td height=24 class=xl65 style='height:18.0pt'>Cell 3</td>
<td class=xl65>Cell 4</td>
</tr>
<!--EndFragment-->
</table>

</body>

</html>
3 changes: 3 additions & 0 deletions test/integration/fixtures/documents/ms-excel-out.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
<!-- wp:table -->
<figure class="wp-block-table"><table><tbody><tr><td>Cell 1</td><td>Cell 2</td></tr><tr><td>Cell 3</td><td>Cell 4</td></tr></tbody></table></figure>
<!-- /wp:table -->
Loading