superdoc-dev · harbournick · Feb 19, 2026 · Feb 19, 2026
diff --git a/apps/docs/core/superdoc/configuration.mdx b/apps/docs/core/superdoc/configuration.mdx
@@ -324,6 +324,22 @@ new SuperDoc({
   Disable custom context menus
 </ParamField>
 
+<ParamField path="onUnsupportedContent" type="function">
+  Callback invoked with HTML elements that were dropped during import because they have no schema representation. Receives an array of `{ tagName, outerHTML, count }` items. When provided, `console.warn` is suppressed.
+
+  ```javascript
+  onUnsupportedContent: (items) => {
+    items.forEach(({ tagName, count }) => {
+      console.log(`Dropped ${count}x <${tagName}>`);
+    });
+  }
+  ```
+</ParamField>
+
+<ParamField path="warnOnUnsupportedContent" type="boolean" default="false">
+  Log a `console.warn` listing HTML elements dropped during import. Ignored when `onUnsupportedContent` is provided.
+</ParamField>
+
 <ParamField path="cspNonce" type="string">
   Content Security Policy nonce
 </ParamField>

diff --git a/apps/docs/core/supereditor/configuration.mdx b/apps/docs/core/supereditor/configuration.mdx
@@ -141,6 +141,22 @@ const editor = await Editor.open(file, {
   Use ProseMirror JSON content instead of DOCX parsing
 </ParamField>
 
+<ParamField path="onUnsupportedContent" type="function">
+  Callback invoked with HTML elements that were dropped during import because they have no schema representation. Receives an array of `{ tagName, outerHTML, count }` items. When provided, `console.warn` is suppressed.
+
+  ```javascript
+  onUnsupportedContent: (items) => {
+    items.forEach(({ tagName, count }) => {
+      console.log(`Dropped ${count}x <${tagName}>`);
+    });
+  }
+  ```
+</ParamField>
+
+<ParamField path="warnOnUnsupportedContent" type="boolean" default="false">
+  Log a `console.warn` listing HTML elements dropped during import. Ignored when `onUnsupportedContent` is provided.
+</ParamField>
+
 ## Features
 
 <ParamField path="isCommentsEnabled" type="boolean" default="false">

diff --git a/apps/docs/core/supereditor/methods.mdx b/apps/docs/core/supereditor/methods.mdx
@@ -542,6 +542,12 @@ Insert content with automatic format detection.
     <ParamField path="position" type="number | Object">
       Insert position (defaults to cursor)
     </ParamField>
+    <ParamField path="onUnsupportedContent" type="function">
+      Callback for HTML elements dropped during parsing. Receives `{ tagName, outerHTML, count }[]`. Falls back to the editor-level option if not set.
+    </ParamField>
+    <ParamField path="warnOnUnsupportedContent" type="boolean" default="false">
+      Log dropped elements via `console.warn`. Falls back to the editor-level option if not set.
+    </ParamField>
   </Expandable>
 </ParamField>
 

diff --git a/packages/super-editor/src/core/Editor.api-contracts.test.js b/packages/super-editor/src/core/Editor.api-contracts.test.js
@@ -117,6 +117,28 @@ describe('Editor - API Contracts (Regression Prevention)', () => {
       });
     });
 
+    it('docx markdown initialization forwards unsupported-content callback', () => {
+      const onUnsupportedContent = vi.fn();
+
+      ({ editor } = initTestEditor({
+        mode: 'docx',
+        content: '<p>Fallback content</p>',
+        markdown: '<video src="demo.mp4"></video>',
+        onUnsupportedContent,
+        useImmediateSetTimeout: false,
+      }));
+
+      return new Promise((resolve) => {
+        setTimeout(() => {
+          expect(onUnsupportedContent).toHaveBeenCalledTimes(1);
+          expect(onUnsupportedContent.mock.calls[0][0]).toEqual([
+            expect.objectContaining({ tagName: 'VIDEO', count: 1 }),
+          ]);
+          resolve();
+        }, 10);
+      });
+    });
+
     it('html option should initialize with editor instance', () => {
       let initCompleted = false;
 

diff --git a/packages/super-editor/src/core/Editor.ts b/packages/super-editor/src/core/Editor.ts
@@ -1888,11 +1888,21 @@ export class Editor extends EventEmitter<EditorEventMap> {
 
           // Check for markdown BEFORE html (since markdown gets converted to HTML)
           if (this.options.markdown) {
-            doc = createDocFromMarkdown(this.options.markdown, this, { isImport: true, document: domDocument });
+            doc = createDocFromMarkdown(this.options.markdown, this, {
+              isImport: true,
+              document: domDocument,
+              onUnsupportedContent: this.options.onUnsupportedContent,
+              warnOnUnsupportedContent: this.options.warnOnUnsupportedContent,
+            });
           }
           // If we have a new doc, and have html data, we initialize from html
           else if (this.options.html)
-            doc = createDocFromHTML(this.options.html, this, { isImport: true, document: domDocument });
+            doc = createDocFromHTML(this.options.html, this, {
+              isImport: true,
+              document: domDocument,
+              onUnsupportedContent: this.options.onUnsupportedContent,
+              warnOnUnsupportedContent: this.options.warnOnUnsupportedContent,
+            });
           else if (this.options.jsonOverride) doc = this.schema.nodeFromJSON(this.options.jsonOverride);
 
           if (fragment) doc = yXmlFragmentToProseMirrorRootNode(fragment, this.schema);
@@ -1902,7 +1912,12 @@ export class Editor extends EventEmitter<EditorEventMap> {
       // If we are in HTML mode, we initialize from either content or html (or blank)
       else if (mode === 'text' || mode === 'html') {
         if (loadFromSchema && hasJsonContent(content)) doc = this.schema.nodeFromJSON(content);
-        else if (typeof content === 'string') doc = createDocFromHTML(content, this, { document: domDocument });
+        else if (typeof content === 'string')
+          doc = createDocFromHTML(content, this, {
+            document: domDocument,
+            onUnsupportedContent: this.options.onUnsupportedContent,
+            warnOnUnsupportedContent: this.options.warnOnUnsupportedContent,
+          });
         else doc = this.schema.topNodeType.createAndFill()!;
       }
     } catch (err) {

diff --git a/packages/super-editor/src/core/commands/insertContent.js b/packages/super-editor/src/core/commands/insertContent.js
@@ -12,6 +12,8 @@ import { processContent } from '../helpers/contentProcessor.js';
  * @param {Object} [options={}] - Options for insertion.
  * @param {string} [options.contentType] - The type of content being inserted: 'html', 'markdown', 'text', or 'schema'.
  * @param {boolean} [options.parseOptions] - Additional options for parsing (if applicable).
+ * @param {((items: Array<{tagName: string, outerHTML: string, count: number}>) => void) | null} [options.onUnsupportedContent] - Callback for unsupported HTML elements. Falls back to editor.options.onUnsupportedContent.
+ * @param {boolean} [options.warnOnUnsupportedContent] - When true, emits console.warn for unsupported content. Falls back to editor.options.warnOnUnsupportedContent.
  * @returns {function} A command function that can be executed by the editor.
  */
 export const insertContent =
@@ -30,6 +32,8 @@ export const insertContent =
           content: value,
           type: options.contentType,
           editor,
+          onUnsupportedContent: options.onUnsupportedContent ?? editor.options?.onUnsupportedContent,
+          warnOnUnsupportedContent: options.warnOnUnsupportedContent ?? editor.options?.warnOnUnsupportedContent,
         });
 
         const jsonContent = processedDoc.toJSON();

diff --git a/packages/super-editor/src/core/helpers/catchAllSchema.js b/packages/super-editor/src/core/helpers/catchAllSchema.js
@@ -0,0 +1,172 @@
+//@ts-check
+import { Schema } from 'prosemirror-model';
+
+/**
+ * @typedef {Object} UnsupportedContentItem
+ * @property {string} tagName - e.g. "HR", "DETAILS"
+ * @property {string} outerHTML - truncated to 200 chars max
+ * @property {number} count - how many instances of this tagName were dropped
+ */
+
+const CATCH_ALL_NODE_NAME = '__supereditor__private__unknown__catch__all__node';
+const MAX_OUTER_HTML_LENGTH = 200;
+
+/** @type {WeakMap<Schema, Schema>} */
+const catchAllSchemaCache = new WeakMap();
+
+/**
+ * Returns a cached copy of the given schema with a catch-all node appended.
+ * The catch-all node matches any element not already handled by the real schema,
+ * allowing detection of unsupported content.
+ *
+ * @param {Schema} baseSchema
+ * @returns {Schema}
+ */
+export function getCatchAllSchema(baseSchema) {
+  let cached = catchAllSchemaCache.get(baseSchema);
+  if (cached) return cached;
+
+  cached = new Schema({
+    topNode: baseSchema.spec.topNode,
+    marks: baseSchema.spec.marks,
+    nodes: baseSchema.spec.nodes.append({
+      [CATCH_ALL_NODE_NAME]: {
+        content: 'inline*',
+        group: 'block',
+        parseDOM: [{ tag: '*' }],
+      },
+    }),
+  });
+
+  catchAllSchemaCache.set(baseSchema, cached);
+  return cached;
+}
+
+/**
+ * Parses an element with a catch-all schema to detect unsupported content.
+ * Returns an aggregated list of unsupported items grouped by tagName.
+ *
+ * @param {Element} element - The DOM element to parse
+ * @param {Schema} schema - The real editor schema
+ * @returns {UnsupportedContentItem[]}
+ */
+export function detectUnsupportedContent(element, schema) {
+  /** @type {Map<string, UnsupportedContentItem>} */
+  const itemsByTag = new Map();
+
+  const knownTags = collectKnownTags(schema);
+  scanForUnsupported(element, knownTags, itemsByTag);
+
+  return Array.from(itemsByTag.values());
+}
+
+/** @type {WeakMap<Schema, Set<string>>} */
+const knownTagsCache = new WeakMap();
+
+/**
+ * Collect all tag names that the schema knows how to parse (cached per schema).
+ * @param {Schema} schema
+ * @returns {Set<string>}
+ */
+function collectKnownTags(schema) {
+  const cached = knownTagsCache.get(schema);
+  if (cached) return cached;
+
+  const tags = new Set();
+
+  // Collect from nodes
+  // NOTE: parseDOM may be a function in super-editor extensions (non-standard),
+  // so we cast to unknown to keep the runtime guard while satisfying TS.
+  for (const nodeType of Object.values(schema.nodes)) {
+    const raw = /** @type {unknown} */ (nodeType.spec.parseDOM);
+    if (!raw) continue;
+    const rules = typeof raw === 'function' ? raw() : /** @type {any[]} */ (raw);
+    for (const rule of rules) {
+      if (rule.tag) {
+        const match = rule.tag.match(/^([a-zA-Z][a-zA-Z0-9-]*)/);
+        if (match) tags.add(match[1].toUpperCase());
+      }
+    }
+  }
+
+  // Collect from marks
+  for (const markType of Object.values(schema.marks)) {
+    const raw = /** @type {unknown} */ (markType.spec.parseDOM);
+    if (!raw) continue;
+    const rules = typeof raw === 'function' ? raw() : /** @type {any[]} */ (raw);
+    for (const rule of rules) {
+      if (rule.tag) {
+        const match = rule.tag.match(/^([a-zA-Z][a-zA-Z0-9-]*)/);
+        if (match) tags.add(match[1].toUpperCase());
+      }
+    }
+  }
+
+  // Always consider basic structural tags as known (they wrap content, not dropped)
+  for (const tag of ['HTML', 'HEAD', 'BODY', 'DIV', 'SPAN']) {
+    tags.add(tag);
+  }
+
+  knownTagsCache.set(schema, tags);
+  return tags;
+}
+
+/**
+ * Recursively scan DOM for elements whose tag is not in the known set.
+ *
+ * When an unknown tag has descendants with known tags (e.g. `<thead>` wrapping
+ * `<tr>`), ProseMirror "looks through" the wrapper and parses the children.
+ * Those transparent wrappers are NOT reported — only elements whose entire
+ * subtree is also unknown (truly dropped content) are reported.
+ *
+ * @param {Element} element
+ * @param {Set<string>} knownTags
+ * @param {Map<string, UnsupportedContentItem>} itemsByTag
+ */
+function scanForUnsupported(element, knownTags, itemsByTag) {
+  for (let i = 0; i < element.children.length; i++) {
+    const child = element.children[i];
+    const tag = child.tagName.toUpperCase();
+
+    if (!knownTags.has(tag)) {
+      // ProseMirror "looks through" unknown wrappers and parses their
+      // children — including text nodes and known elements. Only report
+      // elements whose content is truly lost (no text, no known descendants).
+      if (hasPreservableContent(child, knownTags)) {
+        scanForUnsupported(child, knownTags, itemsByTag);
+        continue;
+      }
+
+      const existing = itemsByTag.get(tag);
+      if (existing) {
+        existing.count++;
+      } else {
+        let outerHTML = child.outerHTML;
+        if (outerHTML.length > MAX_OUTER_HTML_LENGTH) {
+          outerHTML = outerHTML.slice(0, MAX_OUTER_HTML_LENGTH) + '…';
+        }
+        itemsByTag.set(tag, { tagName: tag, outerHTML, count: 1 });
+      }
+    } else {
+      // Known tag — recurse into children to find nested unsupported elements
+      scanForUnsupported(child, knownTags, itemsByTag);
+    }
+  }
+}
+
+/**
+ * Returns true if ProseMirror will preserve content from this element —
+ * either because it contains non-whitespace text or a known descendant element.
+ * @param {Element} element
+ * @param {Set<string>} knownTags
+ * @returns {boolean}
+ */
+function hasPreservableContent(element, knownTags) {
+  if (element.textContent && element.textContent.trim().length > 0) return true;
+  for (let i = 0; i < element.children.length; i++) {
+    const child = element.children[i];
+    if (knownTags.has(child.tagName.toUpperCase())) return true;
+    if (hasPreservableContent(child, knownTags)) return true;
+  }
+  return false;
+}