Skip to content

Revert "[browser][HybridGlobalization] Improve speed performance of IndexOf and LastIndexOf text APIs with HybridGlobalization mode" #97035

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 0 additions & 14 deletions THIRD-PARTY-NOTICES.TXT
Original file line number Diff line number Diff line change
Expand Up @@ -1331,17 +1331,3 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

Aspects of base64 encoding / decoding are based on algorithm described in "Base64 encoding and decoding at almost the speed of a memory
copy", Wojciech Muła and Daniel Lemire. https://arxiv.org/pdf/1910.05109.pdf

License for FormatJS Intl.Segmenter grapheme segmentation algorithm
--------------------------------------------------------------------------
Available at https://github.com/formatjs/formatjs/blob/58d6a7b398d776ca3d2726d72ae1573b65cc3bef/packages/intl-segmenter/LICENSE.md

MIT License

Copyright (c) 2022 FormatJS

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
3 changes: 1 addition & 2 deletions eng/liveBuilds.targets
Original file line number Diff line number Diff line change
Expand Up @@ -208,8 +208,7 @@
$(LibrariesNativeArtifactsPath)package.json;
$(LibrariesNativeArtifactsPath)dotnet.native.wasm;
$(LibrariesNativeArtifactsPath)dotnet.native.js.symbols;
$(LibrariesNativeArtifactsPath)*.dat;
$(LibrariesNativeArtifactsPath)segmentation-rules.json;"
$(LibrariesNativeArtifactsPath)*.dat;"
IsNative="true" />
<!-- for threaded wasm -->
<LibrariesRuntimeFiles Condition="'$(TargetOS)' == 'browser' and Exists('$(LibrariesNativeArtifactsPath)dotnet.native.worker.js')"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,6 @@
<PlatformManifestFileEntry Include="icudt_optimal.dat" IsNative="true" />
<PlatformManifestFileEntry Include="icudt_optimal_no_CJK.dat" IsNative="true" />
<PlatformManifestFileEntry Include="icudt_hybrid.dat" IsNative="true" />
<PlatformManifestFileEntry Include="segmentation-rules.json" IsNative="true" />
<PlatformManifestFileEntry Include="package.json" IsNative="true" />
<PlatformManifestFileEntry Include="dotnet.es6.pre.js" IsNative="true" />
<PlatformManifestFileEntry Include="dotnet.es6.lib.js" IsNative="true" />
Expand Down
3 changes: 1 addition & 2 deletions src/mono/browser/browser.proj
Original file line number Diff line number Diff line change
Expand Up @@ -357,8 +357,7 @@
<ItemGroup>
<ICULibNativeFiles Include="$(ICULibDir)/libicuuc.a;
$(ICULibDir)/libicui18n.a;
$(ICULibDir)/libicudata.a;
$(BrowserProjectRoot)runtime/hybrid-globalization/segmentation-rules.json" />
$(ICULibDir)/libicudata.a" />
<ICULibFiles Include="$(ICULibDir)/*.dat" />
</ItemGroup>
<PropertyGroup>
Expand Down
3 changes: 1 addition & 2 deletions src/mono/browser/build/BrowserWasmApp.targets
Original file line number Diff line number Diff line change
Expand Up @@ -104,9 +104,8 @@

<ItemGroup Condition="'$(InvariantGlobalization)' != 'true'">
<_HybridGlobalizationDataFiles Include="$(MicrosoftNetCoreAppRuntimePackRidNativeDir)icudt_hybrid.dat"/>
<_HybridGlobalizationDataFiles Include="$(MicrosoftNetCoreAppRuntimePackRidNativeDir)segmentation-rules.json"/>
<_IcuAvailableDataFiles Include="$(MicrosoftNetCoreAppRuntimePackRidNativeDir)icudt_*" Exclude="@(_HybridGlobalizationDataFiles);$(_WasmIcuDataFileName)"/>
<WasmIcuDataFileNames Condition="'$(HybridGlobalization)' == 'true'" Include="@(_HybridGlobalizationDataFiles)"/>
<WasmIcuDataFileNames Condition="'$(HybridGlobalization)' == 'true'" Include="$(MicrosoftNetCoreAppRuntimePackRidNativeDir)icudt_hybrid.dat"/>
<WasmIcuDataFileNames Condition="'$(HybridGlobalization)' != 'true' and '$(WasmIncludeFullIcuData)' == 'true'" Include="$(MicrosoftNetCoreAppRuntimePackRidNativeDir)icudt.dat"/>
<WasmIcuDataFileNames Condition="'$(HybridGlobalization)' != 'true' and '$(WasmIncludeFullIcuData)' != 'true' and '$(_WasmIcuDataFileName)' == ''" Include="@(_IcuAvailableDataFiles)"/>
<WasmIcuDataFileNames Condition="'$(HybridGlobalization)' != 'true' and '$(WasmIncludeFullIcuData)' != 'true' and '$(_WasmIcuDataFileName)' != ''" Include="$(_WasmIcuDataFileName)"/>
Expand Down
12 changes: 0 additions & 12 deletions src/mono/browser/runtime/assets.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ import { endMeasure, MeasuredBlock, startMeasure } from "./profiler";
import { AssetEntryInternal } from "./types/internal";
import { AssetEntry } from "./types";
import { VoidPtr } from "./types/emscripten";
import { setSegmentationRulesFromJson } from "./hybrid-globalization/grapheme-segmenter";

// this need to be run only after onRuntimeInitialized event, when the memory is ready
export function instantiate_asset(asset: AssetEntry, url: string, bytes: Uint8Array): void {
Expand All @@ -26,7 +25,6 @@ export function instantiate_asset(asset: AssetEntry, url: string, bytes: Uint8Ar
case "dotnetwasm":
case "js-module-threads":
case "symbols":
case "segmentation-rules":
// do nothing
break;
case "resource":
Expand Down Expand Up @@ -106,16 +104,6 @@ export async function instantiate_symbols_asset(pendingAsset: AssetEntryInternal
}
}

export async function instantiate_segmentation_rules_asset(pendingAsset: AssetEntryInternal): Promise<void> {
try {
const response = await pendingAsset.pendingDownloadInternal!.response;
const json = await response.json();
setSegmentationRulesFromJson(json);
} catch (error: any) {
mono_log_info(`Error loading static json asset ${pendingAsset.name}: ${JSON.stringify(error)}`);
}
}

export async function wait_for_all_assets() {
// wait for all assets in memory
await runtimeHelpers.allAssetsInMemory.promise;
Expand Down
3 changes: 1 addition & 2 deletions src/mono/browser/runtime/exports.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ import { mono_bind_static_method } from "./net6-legacy/method-calls";
import { export_binding_api, export_internal_api, export_mono_api } from "./net6-legacy/exports-legacy";
import { initializeLegacyExports } from "./net6-legacy/globals";
import { mono_log_warn, mono_wasm_stringify_as_error_with_stack } from "./logging";
import { instantiate_asset, instantiate_symbols_asset, instantiate_segmentation_rules_asset } from "./assets";
import { instantiate_asset, instantiate_symbols_asset } from "./assets";
import { jiterpreter_dump_stats } from "./jiterpreter";
import { forceDisposeProxies } from "./gc-handles";

Expand All @@ -46,7 +46,6 @@ function initializeExports(globalObjects: GlobalObjects): RuntimeAPI {
instantiate_asset,
jiterpreter_dump_stats,
forceDisposeProxies,
instantiate_segmentation_rules_asset,
});

const API = export_api();
Expand Down
15 changes: 14 additions & 1 deletion src/mono/browser/runtime/hybrid-globalization/change-case.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,11 @@ import { MonoObject, MonoObjectRef, MonoString, MonoStringRef } from "../types/i
import { Int32Ptr } from "../types/emscripten";
import { wrap_error_root, wrap_no_error_root } from "../invoke-js";
import { localHeapViewU16, setU16_local } from "../memory";
import { isSurrogate } from "./helpers";

const SURROGATE_HIGHER_START = "\uD800";
const SURROGATE_HIGHER_END = "\uDBFF";
const SURROGATE_LOWER_START = "\uDC00";
const SURROGATE_LOWER_END = "\uDFFF";

export function mono_wasm_change_case_invariant(src: number, srcLength: number, dst: number, dstLength: number, toUpper: number, is_exception: Int32Ptr, ex_address: MonoObjectRef): void {
const exceptionRoot = mono_wasm_new_external_root<MonoObject>(ex_address);
Expand Down Expand Up @@ -156,6 +160,15 @@ export function mono_wasm_change_case(culture: MonoStringRef, src: number, srcLe
}
}

function isSurrogate(str: string, startIdx: number) : boolean
{
return SURROGATE_HIGHER_START <= str[startIdx] &&
str[startIdx] <= SURROGATE_HIGHER_END &&
startIdx+1 < str.length &&
SURROGATE_LOWER_START <= str[startIdx+1] &&
str[startIdx+1] <= SURROGATE_LOWER_END;
}

function appendSurrogateToMemory(heapI16: Uint16Array, dst: number, surrogate: string, idx: number)
{
setU16_local(heapI16, dst + idx*2, surrogate.charCodeAt(0));
Expand Down
97 changes: 53 additions & 44 deletions src/mono/browser/runtime/hybrid-globalization/collations.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,9 @@ import { monoStringToString, utf16ToString } from "../strings";
import { MonoObject, MonoObjectRef, MonoString, MonoStringRef } from "../types/internal";
import { Int32Ptr } from "../types/emscripten";
import { wrap_error_root, wrap_no_error_root } from "../invoke-js";
import { GraphemeSegmenter } from "./grapheme-segmenter";

const COMPARISON_ERROR = -2;
const INDEXING_ERROR = -1;
let graphemeSegmenterCached: GraphemeSegmenter | null;

export function mono_wasm_compare_string(culture: MonoStringRef, str1: number, str1Length: number, str2: number, str2Length: number, options: number, is_exception: Int32Ptr, ex_address: MonoObjectRef): number {
const cultureRoot = mono_wasm_new_external_root<MonoString>(culture),
Expand All @@ -22,7 +20,7 @@ export function mono_wasm_compare_string(culture: MonoStringRef, str1: number, s
const casePicker = (options & 0x1f);
const locale = cultureName ? cultureName : undefined;
wrap_no_error_root(is_exception, exceptionRoot);
return compareStrings(string1, string2, locale, casePicker);
return compare_strings(string1, string2, locale, casePicker);
}
catch (ex: any) {
wrap_error_root(is_exception, ex, exceptionRoot);
Expand All @@ -39,19 +37,19 @@ export function mono_wasm_starts_with(culture: MonoStringRef, str1: number, str1
exceptionRoot = mono_wasm_new_external_root<MonoObject>(ex_address);
try {
const cultureName = monoStringToString(cultureRoot);
const prefix = decodeToCleanString(str2, str2Length);
const prefix = decode_to_clean_string(str2, str2Length);
// no need to look for an empty string
if (prefix.length == 0)
return 1; // true

const source = decodeToCleanString(str1, str1Length);
const source = decode_to_clean_string(str1, str1Length);
if (source.length < prefix.length)
return 0; //false
const sourceOfPrefixLength = source.slice(0, prefix.length);

const casePicker = (options & 0x1f);
const locale = cultureName ? cultureName : undefined;
const result = compareStrings(sourceOfPrefixLength, prefix, locale, casePicker);
const result = compare_strings(sourceOfPrefixLength, prefix, locale, casePicker);
wrap_no_error_root(is_exception, exceptionRoot);
return result === 0 ? 1 : 0; // equals ? true : false
}
Expand All @@ -70,19 +68,19 @@ export function mono_wasm_ends_with(culture: MonoStringRef, str1: number, str1Le
exceptionRoot = mono_wasm_new_external_root<MonoObject>(ex_address);
try {
const cultureName = monoStringToString(cultureRoot);
const suffix = decodeToCleanString(str2, str2Length);
const suffix = decode_to_clean_string(str2, str2Length);
if (suffix.length == 0)
return 1; // true

const source = decodeToCleanString(str1, str1Length);
const source = decode_to_clean_string(str1, str1Length);
const diff = source.length - suffix.length;
if (diff < 0)
return 0; //false
const sourceOfSuffixLength = source.slice(diff, source.length);

const casePicker = (options & 0x1f);
const locale = cultureName ? cultureName : undefined;
const result = compareStrings(sourceOfSuffixLength, suffix, locale, casePicker);
const result = compare_strings(sourceOfSuffixLength, suffix, locale, casePicker);
wrap_no_error_root(is_exception, exceptionRoot);
return result === 0 ? 1 : 0; // equals ? true : false
}
Expand All @@ -102,57 +100,68 @@ export function mono_wasm_index_of(culture: MonoStringRef, needlePtr: number, ne
try {
const needle = utf16ToString(<any>needlePtr, <any>(needlePtr + 2 * needleLength));
// no need to look for an empty string
if (cleanString(needle).length == 0) {
if (clean_string(needle).length == 0) {
wrap_no_error_root(is_exception, exceptionRoot);
return fromBeginning ? 0 : srcLength;
}

const source = utf16ToString(<any>srcPtr, <any>(srcPtr + 2 * srcLength));
// no need to look in an empty string
if (cleanString(source).length == 0) {
if (clean_string(source).length == 0) {
wrap_no_error_root(is_exception, exceptionRoot);
return fromBeginning ? 0 : srcLength;
}
const cultureName = monoStringToString(cultureRoot);
const locale = cultureName ? cultureName : undefined;
const casePicker = (options & 0x1f);
let result = -1;

const graphemeSegmenter = graphemeSegmenterCached || (graphemeSegmenterCached = new GraphemeSegmenter());
const needleSegments = [];
let needleIdx = 0;

// Grapheme segmentation of needle string
while (needleIdx < needle.length) {
const needleGrapheme = graphemeSegmenter.nextGrapheme(needle, needleIdx);
needleSegments.push(needleGrapheme);
needleIdx += needleGrapheme.length;
}
const segmenter = new Intl.Segmenter(locale, { granularity: "grapheme" });
const needleSegments = Array.from(segmenter.segment(needle)).map(s => s.segment);
let i = 0;
let stop = false;
let result = -1;
let segmentWidth = 0;
let index = 0;
let nextIndex = 0;
while (!stop) {
// we need to restart the iterator in this outer loop because we have shifted it in the inner loop
const iteratorSrc = segmenter.segment(source.slice(i, source.length))[Symbol.iterator]();
let srcNext = iteratorSrc.next();

let srcIdx = 0;
while (srcIdx < source.length) {
const srcGrapheme = graphemeSegmenter.nextGrapheme(source, srcIdx);
srcIdx += srcGrapheme.length;
if (srcNext.done)
break;

if (!checkMatchFound(srcGrapheme, needleSegments[0], locale, casePicker)) {
continue;
let matchFound = check_match_found(srcNext.value.segment, needleSegments[0], locale, casePicker);
index = nextIndex;
srcNext = iteratorSrc.next();
if (srcNext.done) {
result = matchFound ? index : result;
break;
}
segmentWidth = srcNext.value.index;
nextIndex = index + segmentWidth;
if (matchFound) {
for (let j = 1; j < needleSegments.length; j++) {
if (srcNext.done) {
stop = true;
break;
}
matchFound = check_match_found(srcNext.value.segment, needleSegments[j], locale, casePicker);
if (!matchFound)
break;

let j;
let srcNextIdx = srcIdx;
for (j = 1; j < needleSegments.length; j++) {
const srcGrapheme = graphemeSegmenter.nextGrapheme(source, srcNextIdx);

if (!checkMatchFound(srcGrapheme, needleSegments[j], locale, casePicker)) {
break;
srcNext = iteratorSrc.next();
}
srcNextIdx += srcGrapheme.length;
if (stop)
break;
}
if (j === needleSegments.length) {
result = srcIdx - srcGrapheme.length;

if (matchFound) {
result = index;
if (fromBeginning)
break;
}
i = nextIndex;
}
wrap_no_error_root(is_exception, exceptionRoot);
return result;
Expand All @@ -166,12 +175,12 @@ export function mono_wasm_index_of(culture: MonoStringRef, needlePtr: number, ne
exceptionRoot.release();
}

function checkMatchFound(str1: string, str2: string, locale: string | undefined, casePicker: number): boolean {
return compareStrings(str1, str2, locale, casePicker) === 0;
function check_match_found(str1: string, str2: string, locale: string | undefined, casePicker: number): boolean {
return compare_strings(str1, str2, locale, casePicker) === 0;
}
}

function compareStrings(string1: string, string2: string, locale: string | undefined, casePicker: number): number {
function compare_strings(string1: string, string2: string, locale: string | undefined, casePicker: number): number {
switch (casePicker) {
case 0:
// 0: None - default algorithm for the platform OR
Expand Down Expand Up @@ -263,12 +272,12 @@ function compareStrings(string1: string, string2: string, locale: string | undef
}
}

function decodeToCleanString(strPtr: number, strLen: number) {
function decode_to_clean_string(strPtr: number, strLen: number) {
const str = utf16ToString(<any>strPtr, <any>(strPtr + 2 * strLen));
return cleanString(str);
return clean_string(str);
}

function cleanString(str: string) {
function clean_string(str: string) {
const nStr = str.normalize();
return nStr.replace(/[\u200B-\u200D\uFEFF\0]/g, "");
}
Loading