Skip to content

Commit 69655f4

Browse files
committed
Implement toLowerCase and toUpperCase built-in functions.
Related issue: #323 JerryScript-DCO-1.0-Signed-off-by: Zoltan Herczeg zherczeg@inf.u-szeged.hu
1 parent bc0ca7b commit 69655f4

File tree

5 files changed

+307
-5
lines changed

5 files changed

+307
-5
lines changed

jerry-core/ecma/builtin-objects/ecma-builtin-string-prototype.cpp

Lines changed: 163 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include "ecma-try-catch-macro.h"
2828
#include "jrt.h"
2929
#include "jrt-libc-includes.h"
30+
#include "lit-char-helpers.h"
3031

3132
#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_STRING_BUILTIN
3233

@@ -507,6 +508,164 @@ ecma_builtin_string_prototype_object_substring (ecma_value_t this_arg, /**< this
507508
return ret_value;
508509
} /* ecma_builtin_string_prototype_object_substring */
509510

511+
/**
512+
* Helper function to convert a string to upper or lower case.
513+
*
514+
* @return completion value
515+
* Returned value must be freed with ecma_free_completion_value.
516+
*/
517+
static ecma_completion_value_t
518+
ecma_builtin_string_prototype_object_conversion_helper (ecma_value_t this_arg, /**< this argument */
519+
bool lower_case) /**< convert to lower (true)
520+
* or upper (false) case */
521+
{
522+
ecma_completion_value_t ret_value = ecma_make_empty_completion_value ();
523+
524+
/* 1. */
525+
ECMA_TRY_CATCH (check_coercible_val,
526+
ecma_op_check_object_coercible (this_arg),
527+
ret_value);
528+
529+
/* 2. */
530+
ECMA_TRY_CATCH (to_string_val,
531+
ecma_op_to_string (this_arg),
532+
ret_value);
533+
534+
/* 3. */
535+
ecma_string_t *input_string_p = ecma_get_string_from_value (to_string_val);
536+
lit_utf8_size_t input_size = ecma_string_get_size (input_string_p);
537+
538+
MEM_DEFINE_LOCAL_ARRAY (input_start_p,
539+
input_size,
540+
lit_utf8_byte_t);
541+
542+
ecma_string_to_utf8_string (input_string_p,
543+
input_start_p,
544+
(ssize_t) (input_size));
545+
546+
/*
547+
* The URI encoding has two major phases: first we compute
548+
* the length of the lower case string, then we encode it.
549+
*/
550+
551+
lit_utf8_size_t output_length = 0;
552+
lit_utf8_iterator_t input_iterator = lit_utf8_iterator_create (input_start_p, input_size);
553+
554+
while (!lit_utf8_iterator_is_eos (&input_iterator))
555+
{
556+
ecma_char_t character = lit_utf8_iterator_read_next (&input_iterator);
557+
ecma_char_t character_buffer[LIT_MAXIMUM_OTHER_CASE_LENGTH];
558+
lit_utf8_byte_t utf8_byte_buffer[LIT_UTF8_MAX_BYTES_IN_CODE_POINT];
559+
lit_utf8_size_t character_length;
560+
561+
/*
562+
* We need to keep surrogate pairs. Surrogates are never converted,
563+
* regardless they form a valid pair or not.
564+
*/
565+
if (lit_is_code_unit_high_surrogate (character))
566+
{
567+
ecma_char_t next_character = lit_utf8_iterator_peek_next (&input_iterator);
568+
569+
if (lit_is_code_unit_low_surrogate (next_character))
570+
{
571+
lit_code_point_t surrogate_code_point = lit_convert_surrogate_pair_to_code_point (character, next_character);
572+
output_length += lit_code_point_to_utf8 (surrogate_code_point, utf8_byte_buffer);
573+
lit_utf8_iterator_incr (&input_iterator);
574+
continue;
575+
}
576+
}
577+
578+
if (lower_case)
579+
{
580+
character_length = lit_char_to_lower_case (character,
581+
character_buffer,
582+
LIT_MAXIMUM_OTHER_CASE_LENGTH);
583+
}
584+
else
585+
{
586+
character_length = lit_char_to_upper_case (character,
587+
character_buffer,
588+
LIT_MAXIMUM_OTHER_CASE_LENGTH);
589+
}
590+
591+
JERRY_ASSERT (character_length >= 1 && character_length <= LIT_MAXIMUM_OTHER_CASE_LENGTH);
592+
593+
for (lit_utf8_size_t i = 0; i < character_length; i++)
594+
{
595+
output_length += lit_code_unit_to_utf8 (character_buffer[i], utf8_byte_buffer);
596+
}
597+
}
598+
599+
/* Second phase. */
600+
601+
MEM_DEFINE_LOCAL_ARRAY (output_start_p,
602+
output_length,
603+
lit_utf8_byte_t);
604+
605+
lit_utf8_byte_t *output_char_p = output_start_p;
606+
607+
/* Encoding the output. */
608+
lit_utf8_iterator_seek_bos (&input_iterator);
609+
610+
while (!lit_utf8_iterator_is_eos (&input_iterator))
611+
{
612+
ecma_char_t character = lit_utf8_iterator_read_next (&input_iterator);
613+
ecma_char_t character_buffer[LIT_MAXIMUM_OTHER_CASE_LENGTH];
614+
lit_utf8_size_t character_length;
615+
616+
/*
617+
* We need to keep surrogate pairs. Surrogates are never converted,
618+
* regardless they form a valid pair or not.
619+
*/
620+
if (lit_is_code_unit_high_surrogate (character))
621+
{
622+
ecma_char_t next_character = lit_utf8_iterator_peek_next (&input_iterator);
623+
624+
if (lit_is_code_unit_low_surrogate (next_character))
625+
{
626+
lit_code_point_t surrogate_code_point = lit_convert_surrogate_pair_to_code_point (character, next_character);
627+
output_char_p += lit_code_point_to_utf8 (surrogate_code_point, output_char_p);
628+
lit_utf8_iterator_incr (&input_iterator);
629+
continue;
630+
}
631+
}
632+
633+
if (lower_case)
634+
{
635+
character_length = lit_char_to_lower_case (character,
636+
character_buffer,
637+
LIT_MAXIMUM_OTHER_CASE_LENGTH);
638+
}
639+
else
640+
{
641+
character_length = lit_char_to_upper_case (character,
642+
character_buffer,
643+
LIT_MAXIMUM_OTHER_CASE_LENGTH);
644+
}
645+
646+
JERRY_ASSERT (character_length >= 1 && character_length <= LIT_MAXIMUM_OTHER_CASE_LENGTH);
647+
648+
for (lit_utf8_size_t i = 0; i < character_length; i++)
649+
{
650+
output_char_p += lit_code_point_to_utf8 (character_buffer[i], output_char_p);
651+
}
652+
}
653+
654+
JERRY_ASSERT (output_start_p + output_length == output_char_p);
655+
656+
ecma_string_t *output_string_p = ecma_new_ecma_string_from_utf8 (output_start_p, output_length);
657+
658+
ret_value = ecma_make_normal_completion_value (ecma_make_string_value (output_string_p));
659+
660+
MEM_FINALIZE_LOCAL_ARRAY (output_start_p);
661+
MEM_FINALIZE_LOCAL_ARRAY (input_start_p);
662+
663+
ECMA_FINALIZE (to_string_val);
664+
ECMA_FINALIZE (check_coercible_val);
665+
666+
return ret_value;
667+
} /* ecma_builtin_string_prototype_object_conversion_helper */
668+
510669
/**
511670
* The String.prototype object's 'toLowerCase' routine
512671
*
@@ -519,7 +678,7 @@ ecma_builtin_string_prototype_object_substring (ecma_value_t this_arg, /**< this
519678
static ecma_completion_value_t
520679
ecma_builtin_string_prototype_object_to_lower_case (ecma_value_t this_arg) /**< this argument */
521680
{
522-
ECMA_BUILTIN_CP_UNIMPLEMENTED (this_arg);
681+
return ecma_builtin_string_prototype_object_conversion_helper (this_arg, true);
523682
} /* ecma_builtin_string_prototype_object_to_lower_case */
524683

525684
/**
@@ -534,7 +693,7 @@ ecma_builtin_string_prototype_object_to_lower_case (ecma_value_t this_arg) /**<
534693
static ecma_completion_value_t
535694
ecma_builtin_string_prototype_object_to_locale_lower_case (ecma_value_t this_arg) /**< this argument */
536695
{
537-
ECMA_BUILTIN_CP_UNIMPLEMENTED (this_arg);
696+
return ecma_builtin_string_prototype_object_conversion_helper (this_arg, true);
538697
} /* ecma_builtin_string_prototype_object_to_locale_lower_case */
539698

540699
/**
@@ -549,7 +708,7 @@ ecma_builtin_string_prototype_object_to_locale_lower_case (ecma_value_t this_arg
549708
static ecma_completion_value_t
550709
ecma_builtin_string_prototype_object_to_upper_case (ecma_value_t this_arg) /**< this argument */
551710
{
552-
ECMA_BUILTIN_CP_UNIMPLEMENTED (this_arg);
711+
return ecma_builtin_string_prototype_object_conversion_helper (this_arg, false);
553712
} /* ecma_builtin_string_prototype_object_to_upper_case */
554713

555714
/**
@@ -564,7 +723,7 @@ ecma_builtin_string_prototype_object_to_upper_case (ecma_value_t this_arg) /**<
564723
static ecma_completion_value_t
565724
ecma_builtin_string_prototype_object_to_locale_upper_case (ecma_value_t this_arg) /**< this argument */
566725
{
567-
ECMA_BUILTIN_CP_UNIMPLEMENTED (this_arg);
726+
return ecma_builtin_string_prototype_object_conversion_helper (this_arg, false);
568727
} /* ecma_builtin_string_prototype_object_to_locale_upper_case */
569728

570729
/**

jerry-core/lit/lit-char-helpers.cpp

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -328,3 +328,79 @@ lit_char_is_word_char (ecma_char_t c) /**< code unit */
328328
|| (c >= LIT_CHAR_ASCII_DIGITS_BEGIN && c <= LIT_CHAR_ASCII_DIGITS_END)
329329
|| c == LIT_CHAR_UNDERSCORE);
330330
} /* lit_char_is_word_char */
331+
332+
/**
333+
* Returns the lowercase character sequence of an ecma character.
334+
*
335+
* Note: output_buffer_p must be able to hold at least LIT_MAXIMUM_OTHER_CASE_LENGTH characters.
336+
*
337+
* @return the length of the lowercase character sequence
338+
* which is always between 1 and LIT_MAXIMUM_OTHER_CASE_LENGTH.
339+
*/
340+
lit_utf8_size_t
341+
lit_char_to_lower_case (ecma_char_t character, /**< input character value */
342+
ecma_char_t *output_buffer_p, /**< buffer for the result characters */
343+
size_t buffer_size) /**< buffer size */
344+
{
345+
TODO ("Needs a proper lower case implementation. See issue #323.");
346+
347+
JERRY_ASSERT (buffer_size >= LIT_MAXIMUM_OTHER_CASE_LENGTH);
348+
349+
if (character >= LIT_CHAR_UPPERCASE_A && character <= LIT_CHAR_UPPERCASE_Z)
350+
{
351+
output_buffer_p[0] = (ecma_char_t) (character + (LIT_CHAR_LOWERCASE_A - LIT_CHAR_UPPERCASE_A));
352+
return 1;
353+
}
354+
355+
if (character == 0x130)
356+
{
357+
output_buffer_p[0] = LIT_CHAR_LOWERCASE_I;
358+
output_buffer_p[1] = 0x307;
359+
return 2;
360+
}
361+
362+
output_buffer_p[0] = character;
363+
return 1;
364+
} /* lit_char_to_lower_case */
365+
366+
/**
367+
* Returns the uppercase character sequence of an ecma character.
368+
*
369+
* Note: output_buffer_p must be able to hold at least LIT_MAXIMUM_OTHER_CASE_LENGTH characters.
370+
*
371+
* @return the length of the uppercase character sequence
372+
* which is always between 1 and LIT_MAXIMUM_OTHER_CASE_LENGTH.
373+
*/
374+
lit_utf8_size_t
375+
lit_char_to_upper_case (ecma_char_t character, /**< input character value */
376+
ecma_char_t *output_buffer_p, /**< buffer for the result characters */
377+
size_t buffer_size) /**< buffer size */
378+
{
379+
TODO ("Needs a proper upper case implementation. See issue #323.");
380+
381+
JERRY_ASSERT (buffer_size >= LIT_MAXIMUM_OTHER_CASE_LENGTH);
382+
383+
if (character >= LIT_CHAR_LOWERCASE_A && character <= LIT_CHAR_LOWERCASE_Z)
384+
{
385+
output_buffer_p[0] = (ecma_char_t) (character - (LIT_CHAR_LOWERCASE_A - LIT_CHAR_UPPERCASE_A));
386+
return 1;
387+
}
388+
389+
if (character == 0xdf)
390+
{
391+
output_buffer_p[0] = LIT_CHAR_UPPERCASE_S;
392+
output_buffer_p[1] = LIT_CHAR_UPPERCASE_S;
393+
return 2;
394+
}
395+
396+
if (character == 0x1fd7)
397+
{
398+
output_buffer_p[0] = 0x399;
399+
output_buffer_p[1] = 0x308;
400+
output_buffer_p[2] = 0x342;
401+
return 3;
402+
}
403+
404+
output_buffer_p[0] = character;
405+
return 1;
406+
} /* lit_char_to_upper_case */

jerry-core/lit/lit-char-helpers.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,4 +220,16 @@ extern uint32_t lit_char_hex_to_int (ecma_char_t);
220220
*/
221221
extern bool lit_char_is_word_char (ecma_char_t);
222222

223+
/*
224+
* Utility functions for uppercasing / lowercasing
225+
*/
226+
227+
/**
228+
* Minimum buffer size for lit_char_to_lower_case / lit_char_to_upper_case functions.
229+
*/
230+
#define LIT_MAXIMUM_OTHER_CASE_LENGTH (3)
231+
232+
lit_utf8_size_t lit_char_to_lower_case (ecma_char_t, ecma_char_t *, size_t);
233+
lit_utf8_size_t lit_char_to_upper_case (ecma_char_t, ecma_char_t *, size_t);
234+
223235
#endif /* LIT_CHAR_HELPERS_H */

jerry-core/lit/lit-strings.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -753,7 +753,7 @@ lit_code_point_to_utf8 (lit_code_point_t code_point, /**< code point */
753753
buf[3] = LIT_UTF8_EXTRA_BYTE_MARKER | fourth_byte_bits;
754754
return 4;
755755
}
756-
} /* lit_code_unit_to_utf8 */
756+
} /* lit_code_point_to_utf8 */
757757

758758
/**
759759
* Convert surrogate pair to code point
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
// Copyright 2015 University of Szeged
2+
// Copyright 2015 Samsung Electronics Co., Ltd.
3+
//
4+
// Licensed under the Apache License, Version 2.0 (the "License");
5+
// you may not use this file except in compliance with the License.
6+
// You may obtain a copy of the License at
7+
//
8+
// http://www.apache.org/licenses/LICENSE-2.0
9+
//
10+
// Unless required by applicable law or agreed to in writing, software
11+
// distributed under the License is distributed on an "AS IS" BASIS
12+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
// See the License for the specific language governing permissions and
14+
// limitations under the License.
15+
16+
// Conversion
17+
18+
assert ("0123456789abcdefghijklmnopqrstuvwxzyABCDEFGHIJKLMNOPQRSTUVWXYZ".toLowerCase()
19+
== "0123456789abcdefghijklmnopqrstuvwxzyabcdefghijklmnopqrstuvwxyz");
20+
assert ("0123456789abcdefghijklmnopqrstuvwxzyABCDEFGHIJKLMNOPQRSTUVWXYZ".toUpperCase()
21+
== "0123456789ABCDEFGHIJKLMNOPQRSTUVWXZYABCDEFGHIJKLMNOPQRSTUVWXYZ");
22+
23+
assert ("\u0130".toLowerCase() == "i\u0307");
24+
assert ("\xdf".toUpperCase() == "SS");
25+
assert ("\u1fd7".toUpperCase() == "\u0399\u0308\u0342");
26+
27+
assert ("H\u0130-+".toLowerCase() == "hi\u0307-+");
28+
assert ("\xdf\u1fd7\xdf".toUpperCase() == "SS\u0399\u0308\u0342SS");
29+
assert ("\u0130\u0130\u0130".toLowerCase() == "i\u0307i\u0307i\u0307");
30+
31+
// Although codepoint 0x10400 and 0x10428 are an upper-lowercase pair,
32+
// we must not do their conversion in JavaScript. We must also ignore
33+
// stray surrogates.
34+
35+
assert ("\ud801\ud801\udc00\udc00".toLowerCase() == "\ud801\ud801\udc00\udc00");
36+
assert ("\ud801\ud801\udc28\udc28".toUpperCase() == "\ud801\ud801\udc28\udc28");
37+
38+
// Conversion of non-string objects.
39+
40+
assert (String.prototype.toUpperCase.call(true) == "TRUE");
41+
assert (String.prototype.toLowerCase.call(-23) == "-23");
42+
43+
var object = { toString : function() { return "<sTr>"; } };
44+
assert (String.prototype.toUpperCase.call(object) == "<STR>");
45+
assert (String.prototype.toLowerCase.call(object) == "<str>");
46+
47+
try
48+
{
49+
String.prototype.toUpperCase.call(null);
50+
assert(false);
51+
}
52+
catch (e)
53+
{
54+
assert (e instanceof TypeError);
55+
}

0 commit comments

Comments
 (0)