From af3a386eada49949488e93ffe7dd50a53f15211f Mon Sep 17 00:00:00 2001 From: Sukka Date: Wed, 18 Dec 2019 16:50:30 +0800 Subject: [PATCH] feat(tocObj): bring up (#137) * feat(tocObj): bring up toc parser https://github.com/hexojs/hexo-util/issues/136 * docs(tocObj): bring up * feat(tocObj): export tocObj * refactor(tocObj): update min_depth * docs(tocObj): 'min_depth' defaults to 1 * test(tocObj): fix escape test * style: eslint * test(tocObj): fix escape test * style: arrow-parens --- README.md | 62 ++++++++++++++++++++++++++++++++++++++++++ lib/index.js | 1 + lib/toc_obj.js | 43 +++++++++++++++++++++++++++++ package.json | 1 + test/toc_obj.spec.js | 65 ++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 172 insertions(+) create mode 100644 lib/toc_obj.js create mode 100644 test/toc_obj.spec.js diff --git a/README.md b/README.md index 3be27e25..ff2341dc 100644 --- a/README.md +++ b/README.md @@ -35,6 +35,7 @@ Utilities for [Hexo]. - [spawn](#spawncommand-args-options) - [stripHTML](#striphtmlstr) - [wordWrap](#wordwrapstr-options) +- [tocObj](#tocobjstr-options) - [truncate](#truncatestr-options) - [unescapeHTML](#unescapehtmlstr) - [url_for](#url_forpath-option) @@ -467,6 +468,67 @@ wordWrap('Once upon a time', {width: 1}) // Once\nupon\na\ntime ``` +### tocObj(str, [options]) + +Generate a table of contents in JSON format based on the given html string. + +Option | Description | Default +--- | --- | --- +`min_depth` | The minimum level of TOC | 1 +`max_depth` | The maximum level of TOC | 6 + + +``` js +const html = [ + '

Title 1

', + '

Title 1.1

', + '

Title 1.1.1

', + '

Title 1.2

', + '

Title 1.3

', + '

Title 1.3.1

', + '

Title 2

', + '

Title 2.1

' +].join('\n'); + +tocObj(html); +/* +[ + { text: 'Title 1', id: 'title_1', level: 1 }, + { text: 'Title 1.1', id: 'title_1_1', level: 2 }, + { text: 'Title 1.1.1', id: 'title_1_1_1', level: 3 }, + { text: 'Title 1.2', id: 'title_1_2', level: 2 }, + { text: 'Title 1.3', id: 'title_1_3', level: 2 }, + { text: 'Title 1.3.1', id: 'title_1_3_1', level: 3 }, + { text: 'Title 2', id: 'title_2', level: 1 }, + { text: 'Title 2.1', id: 'title_2_1', level: 2 }, +] +*/ + +tocObj(html, { min_depth: 2 }); +/* +[ + { text: 'Title 1.1', id: 'title_1_1', level: 2 }, + { text: 'Title 1.1.1', id: 'title_1_1_1', level: 3 }, + { text: 'Title 1.2', id: 'title_1_2', level: 2 }, + { text: 'Title 1.3', id: 'title_1_3', level: 2 }, + { text: 'Title 1.3.1', id: 'title_1_3_1', level: 3 }, + { text: 'Title 2.1', id: 'title_2_1', level: 2 }, +] +*/ + +tocObj(html, { max_depth: 2 }); +/* +[ + { text: 'Title 1', id: 'title_1', level: 1 }, + { text: 'Title 1.1', id: 'title_1_1', level: 2 }, + { text: 'Title 1.2', id: 'title_1_2', level: 2 }, + { text: 'Title 1.3', id: 'title_1_3', level: 2 }, + { text: 'Title 2', id: 'title_2', level: 1 }, + { text: 'Title 2.1', id: 'title_2_1', level: 2 }, +] +*/ +``` + ### truncate(str, [options]) Truncates a given text after a given `length` if text is longer than `length`. The last characters will be replaced with the `omission` option for a total length not exceeding `length`. diff --git a/lib/index.js b/lib/index.js index 5323d2f0..37c5a94e 100644 --- a/lib/index.js +++ b/lib/index.js @@ -26,6 +26,7 @@ exports.relative_url = require('./relative_url'); exports.slugize = require('./slugize'); exports.spawn = require('./spawn'); exports.stripHTML = require('./strip_html'); +exports.tocObj = require('./toc_obj'); exports.truncate = require('./truncate'); exports.unescapeHTML = require('./unescape_html'); exports.url_for = require('./url_for'); diff --git a/lib/toc_obj.js b/lib/toc_obj.js new file mode 100644 index 00000000..b219933f --- /dev/null +++ b/lib/toc_obj.js @@ -0,0 +1,43 @@ +'use strict'; +const { DomHandler, DomUtils, Parser } = require('htmlparser2'); +const escapeHTML = require('./escape_html'); + +const parseHtml = html => { + const handler = new DomHandler(null, {}); + new Parser(handler, {}).end(html); + return handler.dom; +}; + +const getId = ele => { + const { id } = ele.attribs; + const { parent } = ele; + return id || (parent.length < 1 ? null : getId(parent)); +}; + +function tocObj(str, options = {}) { + options = Object.assign({ + min_depth: 1, + max_depth: 6 + }, options); + + const headingsSelector = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'].slice(options.min_depth - 1, options.max_depth).join(','); + + const dom = parseHtml(str); + const headings = DomUtils.find(el => headingsSelector.includes(el.tagName), dom, true); + + const result = []; + + if (!headings.length) return result; + + for (const el of headings) { + const level = +el.name[1]; + const id = getId(el); + const text = escapeHTML(DomUtils.getText(el)); + + result.push({ text, id, level }); + } + + return result; +} + +module.exports = tocObj; diff --git a/package.json b/package.json index 492d002f..adc2994a 100644 --- a/package.json +++ b/package.json @@ -48,6 +48,7 @@ "cross-spawn": "^7.0.0", "deepmerge": "^4.2.2", "highlight.js": "^9.13.1", + "htmlparser2": "^4.0.0", "punycode.js": "^2.1.0", "striptags": "^3.1.1" }, diff --git a/test/toc_obj.spec.js b/test/toc_obj.spec.js new file mode 100644 index 00000000..d8fdf614 --- /dev/null +++ b/test/toc_obj.spec.js @@ -0,0 +1,65 @@ +'use strict'; + +require('chai').should(); + +describe('tocObj', () => { + const tocObj = require('../lib/toc_obj'); + + const html = [ + '

Title 1

', + '

Title 1.1

', + '

Title 1.1.1

', + '

Title 1.2

', + '

Title 1.3

', + '

Title 1.3.1

', + '

Title 2

', + '

Title 2.1

', + '

Title should escape &, \', < and "

', + '

Chapter 1 should be printed to toc

' + ].join(''); + + it('default', () => { + const expected = [ + { text: 'Title 1', id: 'title_1', level: 1 }, + { text: 'Title 1.1', id: 'title_1_1', level: 2 }, + { text: 'Title 1.1.1', id: 'title_1_1_1', level: 3 }, + { text: 'Title 1.2', id: 'title_1_2', level: 2 }, + { text: 'Title 1.3', id: 'title_1_3', level: 2 }, + { text: 'Title 1.3.1', id: 'title_1_3_1', level: 3 }, + { text: 'Title 2', id: 'title_2', level: 1 }, + { text: 'Title 2.1', id: 'title_2_1', level: 2 }, + { text: 'Title should escape &, ', < and "', id: 'title_3', level: 1 }, + { text: 'Chapter 1 should be printed to toc', id: 'title_4', level: 1 } + ]; + + tocObj(html).should.eql(expected); + }); + + it('options - min_depth', () => { + const expected = [ + { text: 'Title 1.1', id: 'title_1_1', level: 2 }, + { text: 'Title 1.1.1', id: 'title_1_1_1', level: 3 }, + { text: 'Title 1.2', id: 'title_1_2', level: 2 }, + { text: 'Title 1.3', id: 'title_1_3', level: 2 }, + { text: 'Title 1.3.1', id: 'title_1_3_1', level: 3 }, + { text: 'Title 2.1', id: 'title_2_1', level: 2 } + ]; + + tocObj(html, { min_depth: 2 }).should.eql(expected); + }); + + it('options - max_depth', () => { + const expected = [ + { text: 'Title 1', id: 'title_1', level: 1 }, + { text: 'Title 1.1', id: 'title_1_1', level: 2 }, + { text: 'Title 1.2', id: 'title_1_2', level: 2 }, + { text: 'Title 1.3', id: 'title_1_3', level: 2 }, + { text: 'Title 2', id: 'title_2', level: 1 }, + { text: 'Title 2.1', id: 'title_2_1', level: 2 }, + { text: 'Title should escape &, ', < and "', id: 'title_3', level: 1 }, + { text: 'Chapter 1 should be printed to toc', id: 'title_4', level: 1 } + ]; + + tocObj(html, { max_depth: 2 }).should.eql(expected); + }); +});