Skip to content

Commit

Permalink
ComboboxControl: Simplify string normalization (#60893)
Browse files Browse the repository at this point in the history
* ComboboxControl: Simplify string normalization

* CHANGELOG

* Update CHANGELOG

* Try literal notation

* Add inline comment to clarify regex

* Fix test comments

Co-authored-by: tyxla <tyxla@git.wordpress.org>
Co-authored-by: Mamaduka <mamaduka@git.wordpress.org>
  • Loading branch information
3 people authored Apr 23, 2024
1 parent 05fbb73 commit 69adf95
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 59 deletions.
4 changes: 4 additions & 0 deletions packages/components/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@

- `SlotFill`: fixed missing `getServerSnapshot` parameter in slot map ([#60943](https://github.com/WordPress/gutenberg/pull/60943)).

### Enhancements

- `ComboboxControl`: Simplify string normalization ([#60893](https://github.com/WordPress/gutenberg/pull/60893)).

## 27.4.0 (2024-04-19)

### Deprecation
Expand Down
70 changes: 11 additions & 59 deletions packages/components/src/utils/strings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,66 +4,18 @@
import removeAccents from 'remove-accents';
import { paramCase } from 'change-case';

/**
* All unicode characters that we consider "dash-like":
* - `\u007e`: ~ (tilde)
* - `\u00ad`: ­ (soft hyphen)
* - `\u2053`: ⁓ (swung dash)
* - `\u207b`: ⁻ (superscript minus)
* - `\u208b`: ₋ (subscript minus)
* - `\u2212`: − (minus sign)
* - `\\p{Pd}`: any other Unicode dash character
*/
const ALL_UNICODE_DASH_CHARACTERS = new RegExp(
`[${ [
// - (hyphen-minus)
'\u002d',
// ~ (tilde)
'\u007e',
// ­ (soft hyphen)
'\u00ad',
// ֊ (armenian hyphen)
'\u058a',
// ־ (hebrew punctuation maqaf)
'\u05be',
// ᐀ (canadian syllabics hyphen)
'\u1400',
// ᠆ (mongolian todo soft hyphen)
'\u1806',
// ‐ (hyphen)
'\u2010',
// non-breaking hyphen)
'\u2011',
// ‒ (figure dash)
'\u2012',
// – (en dash)
'\u2013',
// — (em dash)
'\u2014',
// ― (horizontal bar)
'\u2015',
// ⁓ (swung dash)
'\u2053',
// superscript minus)
'\u207b',
// subscript minus)
'\u208b',
// − (minus sign)
'\u2212',
// ⸗ (double oblique hyphen)
'\u2e17',
// ⸺ (two-em dash)
'\u2e3a',
// ⸻ (three-em dash)
'\u2e3b',
// 〜 (wave dash)
'\u301c',
// 〰 (wavy dash)
'\u3030',
// ゠ (katakana-hiragana double hyphen)
'\u30a0',
// ︱ (presentation form for vertical em dash)
'\ufe31',
// ︲ (presentation form for vertical en dash)
'\ufe32',
// ﹘ (small em dash)
'\ufe58',
// ﹣ (small hyphen-minus)
'\ufe63',
// - (fullwidth hyphen-minus)
'\uff0d',
].join( '' ) }]`,
'g'
/[\u007e\u00ad\u2053\u207b\u208b\u2212\p{Pd}]/gu
);

export const normalizeTextString = ( value: string ): string => {
Expand Down
62 changes: 62 additions & 0 deletions packages/components/src/utils/test/strings.js
Original file line number Diff line number Diff line change
Expand Up @@ -106,5 +106,67 @@ describe( 'normalizeTextString', () => {
expect( normalizeTextString( 'foo⸻bar' ) ).toBe( 'foo-bar' );
expect( normalizeTextString( 'foo゠bar' ) ).toBe( 'foo-bar' );
expect( normalizeTextString( 'foo-bar' ) ).toBe( 'foo-bar' );

const dashCharacters = [
// - (hyphen-minus)
'\u002d',
// ~ (tilde)
'\u007e',
// ­ (soft hyphen)
'\u00ad',
// ֊ (armenian hyphen)
'\u058a',
// ־ (hebrew punctuation maqaf)
'\u05be',
// ᐀ (canadian syllabics hyphen)
'\u1400',
// ᠆ (mongolian todo soft hyphen)
'\u1806',
// ‐ (hyphen)
'\u2010',
// non-breaking hyphen)
'\u2011',
// ‒ (figure dash)
'\u2012',
// – (en dash)
'\u2013',
// — (em dash)
'\u2014',
// ― (horizontal bar)
'\u2015',
// ⁓ (swung dash)
'\u2053',
// ⁻ (superscript minus)
'\u207b',
// ₋ (subscript minus)
'\u208b',
// − (minus sign)
'\u2212',
// ⸗ (double oblique hyphen)
'\u2e17',
// ⸺ (two-em dash)
'\u2e3a',
// ⸻ (three-em dash)
'\u2e3b',
// 〜 (wave dash)
'\u301c',
// 〰 (wavy dash)
'\u3030',
// ゠ (katakana-hiragana double hyphen)
'\u30a0',
// ︱ (presentation form for vertical em dash)
'\ufe31',
// ︲ (presentation form for vertical en dash)
'\ufe32',
// ﹘ (small em dash)
'\ufe58',
// ﹣ (small hyphen-minus)
'\ufe63',
// - (fullwidth hyphen-minus)
'\uff0d',
];
expect( normalizeTextString( dashCharacters.join( '' ) ) ).toBe(
'-'.repeat( dashCharacters.length )
);
} );
} );

0 comments on commit 69adf95

Please sign in to comment.