Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions apps/docs/core/superdoc/configuration.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,22 @@ new SuperDoc({
Disable custom context menus
</ParamField>

<ParamField path="onUnsupportedContent" type="function">
Callback invoked with HTML elements that were dropped during import because they have no schema representation. Receives an array of `{ tagName, outerHTML, count }` items. When provided, `console.warn` is suppressed.

```javascript
onUnsupportedContent: (items) => {
items.forEach(({ tagName, count }) => {
console.log(`Dropped ${count}x <${tagName}>`);
});
}
```
</ParamField>

<ParamField path="warnOnUnsupportedContent" type="boolean" default="false">
Log a `console.warn` listing HTML elements dropped during import. Ignored when `onUnsupportedContent` is provided.
</ParamField>

<ParamField path="cspNonce" type="string">
Content Security Policy nonce
</ParamField>
Expand Down
16 changes: 16 additions & 0 deletions apps/docs/core/supereditor/configuration.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,22 @@ const editor = await Editor.open(file, {
Use ProseMirror JSON content instead of DOCX parsing
</ParamField>

<ParamField path="onUnsupportedContent" type="function">
Callback invoked with HTML elements that were dropped during import because they have no schema representation. Receives an array of `{ tagName, outerHTML, count }` items. When provided, `console.warn` is suppressed.

```javascript
onUnsupportedContent: (items) => {
items.forEach(({ tagName, count }) => {
console.log(`Dropped ${count}x <${tagName}>`);
});
}
```
</ParamField>

<ParamField path="warnOnUnsupportedContent" type="boolean" default="false">
Log a `console.warn` listing HTML elements dropped during import. Ignored when `onUnsupportedContent` is provided.
</ParamField>

## Features

<ParamField path="isCommentsEnabled" type="boolean" default="false">
Expand Down
6 changes: 6 additions & 0 deletions apps/docs/core/supereditor/methods.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -542,6 +542,12 @@ Insert content with automatic format detection.
<ParamField path="position" type="number | Object">
Insert position (defaults to cursor)
</ParamField>
<ParamField path="onUnsupportedContent" type="function">
Callback for HTML elements dropped during parsing. Receives `{ tagName, outerHTML, count }[]`. Falls back to the editor-level option if not set.
</ParamField>
<ParamField path="warnOnUnsupportedContent" type="boolean" default="false">
Log dropped elements via `console.warn`. Falls back to the editor-level option if not set.
</ParamField>
</Expandable>
</ParamField>

Expand Down
22 changes: 22 additions & 0 deletions packages/super-editor/src/core/Editor.api-contracts.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,28 @@ describe('Editor - API Contracts (Regression Prevention)', () => {
});
});

it('docx markdown initialization forwards unsupported-content callback', () => {
const onUnsupportedContent = vi.fn();

({ editor } = initTestEditor({
mode: 'docx',
content: '<p>Fallback content</p>',
markdown: '<video src="demo.mp4"></video>',
onUnsupportedContent,
useImmediateSetTimeout: false,
}));

return new Promise((resolve) => {
setTimeout(() => {
expect(onUnsupportedContent).toHaveBeenCalledTimes(1);
expect(onUnsupportedContent.mock.calls[0][0]).toEqual([
expect.objectContaining({ tagName: 'VIDEO', count: 1 }),
]);
resolve();
}, 10);
});
});

it('html option should initialize with editor instance', () => {
let initCompleted = false;

Expand Down
21 changes: 18 additions & 3 deletions packages/super-editor/src/core/Editor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1888,11 +1888,21 @@ export class Editor extends EventEmitter<EditorEventMap> {

// Check for markdown BEFORE html (since markdown gets converted to HTML)
if (this.options.markdown) {
doc = createDocFromMarkdown(this.options.markdown, this, { isImport: true, document: domDocument });
doc = createDocFromMarkdown(this.options.markdown, this, {
isImport: true,
document: domDocument,
onUnsupportedContent: this.options.onUnsupportedContent,
warnOnUnsupportedContent: this.options.warnOnUnsupportedContent,
});
}
// If we have a new doc, and have html data, we initialize from html
else if (this.options.html)
doc = createDocFromHTML(this.options.html, this, { isImport: true, document: domDocument });
doc = createDocFromHTML(this.options.html, this, {
isImport: true,
document: domDocument,
onUnsupportedContent: this.options.onUnsupportedContent,
warnOnUnsupportedContent: this.options.warnOnUnsupportedContent,
});
else if (this.options.jsonOverride) doc = this.schema.nodeFromJSON(this.options.jsonOverride);

if (fragment) doc = yXmlFragmentToProseMirrorRootNode(fragment, this.schema);
Expand All @@ -1902,7 +1912,12 @@ export class Editor extends EventEmitter<EditorEventMap> {
// If we are in HTML mode, we initialize from either content or html (or blank)
else if (mode === 'text' || mode === 'html') {
if (loadFromSchema && hasJsonContent(content)) doc = this.schema.nodeFromJSON(content);
else if (typeof content === 'string') doc = createDocFromHTML(content, this, { document: domDocument });
else if (typeof content === 'string')
doc = createDocFromHTML(content, this, {
document: domDocument,
onUnsupportedContent: this.options.onUnsupportedContent,
warnOnUnsupportedContent: this.options.warnOnUnsupportedContent,
});
else doc = this.schema.topNodeType.createAndFill()!;
}
} catch (err) {
Expand Down
4 changes: 4 additions & 0 deletions packages/super-editor/src/core/commands/insertContent.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ import { processContent } from '../helpers/contentProcessor.js';
* @param {Object} [options={}] - Options for insertion.
* @param {string} [options.contentType] - The type of content being inserted: 'html', 'markdown', 'text', or 'schema'.
* @param {boolean} [options.parseOptions] - Additional options for parsing (if applicable).
* @param {((items: Array<{tagName: string, outerHTML: string, count: number}>) => void) | null} [options.onUnsupportedContent] - Callback for unsupported HTML elements. Falls back to editor.options.onUnsupportedContent.
* @param {boolean} [options.warnOnUnsupportedContent] - When true, emits console.warn for unsupported content. Falls back to editor.options.warnOnUnsupportedContent.
* @returns {function} A command function that can be executed by the editor.
*/
export const insertContent =
Expand All @@ -30,6 +32,8 @@ export const insertContent =
content: value,
type: options.contentType,
editor,
onUnsupportedContent: options.onUnsupportedContent ?? editor.options?.onUnsupportedContent,
warnOnUnsupportedContent: options.warnOnUnsupportedContent ?? editor.options?.warnOnUnsupportedContent,
});

const jsonContent = processedDoc.toJSON();
Expand Down
172 changes: 172 additions & 0 deletions packages/super-editor/src/core/helpers/catchAllSchema.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
//@ts-check
import { Schema } from 'prosemirror-model';

/**
* @typedef {Object} UnsupportedContentItem
* @property {string} tagName - e.g. "HR", "DETAILS"
* @property {string} outerHTML - truncated to 200 chars max
* @property {number} count - how many instances of this tagName were dropped
*/

const CATCH_ALL_NODE_NAME = '__supereditor__private__unknown__catch__all__node';
const MAX_OUTER_HTML_LENGTH = 200;

/** @type {WeakMap<Schema, Schema>} */
const catchAllSchemaCache = new WeakMap();

/**
* Returns a cached copy of the given schema with a catch-all node appended.
* The catch-all node matches any element not already handled by the real schema,
* allowing detection of unsupported content.
*
* @param {Schema} baseSchema
* @returns {Schema}
*/
export function getCatchAllSchema(baseSchema) {
let cached = catchAllSchemaCache.get(baseSchema);
if (cached) return cached;

cached = new Schema({
topNode: baseSchema.spec.topNode,
marks: baseSchema.spec.marks,
nodes: baseSchema.spec.nodes.append({
[CATCH_ALL_NODE_NAME]: {
content: 'inline*',
group: 'block',
parseDOM: [{ tag: '*' }],
},
}),
});

catchAllSchemaCache.set(baseSchema, cached);
return cached;
}

/**
* Parses an element with a catch-all schema to detect unsupported content.
* Returns an aggregated list of unsupported items grouped by tagName.
*
* @param {Element} element - The DOM element to parse
* @param {Schema} schema - The real editor schema
* @returns {UnsupportedContentItem[]}
*/
export function detectUnsupportedContent(element, schema) {
/** @type {Map<string, UnsupportedContentItem>} */
const itemsByTag = new Map();

const knownTags = collectKnownTags(schema);
scanForUnsupported(element, knownTags, itemsByTag);

return Array.from(itemsByTag.values());
}

/** @type {WeakMap<Schema, Set<string>>} */
const knownTagsCache = new WeakMap();

/**
* Collect all tag names that the schema knows how to parse (cached per schema).
* @param {Schema} schema
* @returns {Set<string>}
*/
function collectKnownTags(schema) {
const cached = knownTagsCache.get(schema);
if (cached) return cached;

const tags = new Set();

// Collect from nodes
// NOTE: parseDOM may be a function in super-editor extensions (non-standard),
// so we cast to unknown to keep the runtime guard while satisfying TS.
for (const nodeType of Object.values(schema.nodes)) {
const raw = /** @type {unknown} */ (nodeType.spec.parseDOM);
if (!raw) continue;
const rules = typeof raw === 'function' ? raw() : /** @type {any[]} */ (raw);
for (const rule of rules) {
if (rule.tag) {
const match = rule.tag.match(/^([a-zA-Z][a-zA-Z0-9-]*)/);
if (match) tags.add(match[1].toUpperCase());
}
}
}

// Collect from marks
for (const markType of Object.values(schema.marks)) {
const raw = /** @type {unknown} */ (markType.spec.parseDOM);
if (!raw) continue;
const rules = typeof raw === 'function' ? raw() : /** @type {any[]} */ (raw);
for (const rule of rules) {
if (rule.tag) {
const match = rule.tag.match(/^([a-zA-Z][a-zA-Z0-9-]*)/);
if (match) tags.add(match[1].toUpperCase());
}
}
}

// Always consider basic structural tags as known (they wrap content, not dropped)
for (const tag of ['HTML', 'HEAD', 'BODY', 'DIV', 'SPAN']) {
tags.add(tag);
}

knownTagsCache.set(schema, tags);
return tags;
}

/**
* Recursively scan DOM for elements whose tag is not in the known set.
*
* When an unknown tag has descendants with known tags (e.g. `<thead>` wrapping
* `<tr>`), ProseMirror "looks through" the wrapper and parses the children.
* Those transparent wrappers are NOT reported — only elements whose entire
* subtree is also unknown (truly dropped content) are reported.
*
* @param {Element} element
* @param {Set<string>} knownTags
* @param {Map<string, UnsupportedContentItem>} itemsByTag
*/
function scanForUnsupported(element, knownTags, itemsByTag) {
for (let i = 0; i < element.children.length; i++) {
const child = element.children[i];
const tag = child.tagName.toUpperCase();

if (!knownTags.has(tag)) {
// ProseMirror "looks through" unknown wrappers and parses their
// children — including text nodes and known elements. Only report
// elements whose content is truly lost (no text, no known descendants).
if (hasPreservableContent(child, knownTags)) {
scanForUnsupported(child, knownTags, itemsByTag);
continue;
}

const existing = itemsByTag.get(tag);
if (existing) {
existing.count++;
} else {
let outerHTML = child.outerHTML;
if (outerHTML.length > MAX_OUTER_HTML_LENGTH) {
outerHTML = outerHTML.slice(0, MAX_OUTER_HTML_LENGTH) + '…';
}
itemsByTag.set(tag, { tagName: tag, outerHTML, count: 1 });
}
} else {
// Known tag — recurse into children to find nested unsupported elements
scanForUnsupported(child, knownTags, itemsByTag);
}
}
}

/**
* Returns true if ProseMirror will preserve content from this element —
* either because it contains non-whitespace text or a known descendant element.
* @param {Element} element
* @param {Set<string>} knownTags
* @returns {boolean}
*/
function hasPreservableContent(element, knownTags) {
if (element.textContent && element.textContent.trim().length > 0) return true;
for (let i = 0; i < element.children.length; i++) {
const child = element.children[i];
if (knownTags.has(child.tagName.toUpperCase())) return true;
if (hasPreservableContent(child, knownTags)) return true;
}
return false;
}
Loading
Loading