Skip to content

Commit 4c4b2f4

Browse files
committed
Change lpad and rpad functions signature and definition
1 parent 26b90b0 commit 4c4b2f4

File tree

4 files changed

+67
-153
lines changed

4 files changed

+67
-153
lines changed

cpp/src/gandiva/function_registry_string.cc

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -135,17 +135,19 @@ std::vector<NativeFunction> GetStringFunctionRegistry() {
135135
NativeFunction::kNeedsContext),
136136

137137
NativeFunction("lpad", {}, DataTypeVector{utf8(), int32(), utf8()}, utf8(),
138-
kResultNullIfNull, "lpad", NativeFunction::kNeedsContext),
138+
kResultNullIfNull, "lpad_utf8_int32_utf8",
139+
NativeFunction::kNeedsContext),
139140

140141
NativeFunction("lpad", {}, DataTypeVector{utf8(), int32()}, utf8(),
141-
kResultNullIfNull, "lpad_no_fill_text",
142+
kResultNullIfNull, "lpad_utf8_int32",
142143
NativeFunction::kNeedsContext),
143144

144145
NativeFunction("rpad", {}, DataTypeVector{utf8(), int32(), utf8()}, utf8(),
145-
kResultNullIfNull, "rpad", NativeFunction::kNeedsContext),
146+
kResultNullIfNull, "rpad_utf8_int32_utf8",
147+
NativeFunction::kNeedsContext),
146148

147149
NativeFunction("rpad", {}, DataTypeVector{utf8(), int32()}, utf8(),
148-
kResultNullIfNull, "rpad_no_fill_text",
150+
kResultNullIfNull, "rpad_utf8_int32",
149151
NativeFunction::kNeedsContext),
150152

151153
NativeFunction("concatOperator", {}, DataTypeVector{utf8(), utf8()}, utf8(),

cpp/src/gandiva/precompiled/string_ops.cc

Lines changed: 12 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -1423,9 +1423,9 @@ const char* replace_utf8_utf8_utf8(gdv_int64 context, const char* text,
14231423
}
14241424

14251425
FORCE_INLINE
1426-
const char* lpad(gdv_int64 context, const char* text, gdv_int32 text_len,
1427-
gdv_int32 return_length, const char* fill_text, gdv_int32 fill_text_len,
1428-
gdv_int32* out_len) {
1426+
const char* lpad_utf8_int32_utf8(gdv_int64 context, const char* text, gdv_int32 text_len,
1427+
gdv_int32 return_length, const char* fill_text,
1428+
gdv_int32 fill_text_len, gdv_int32* out_len) {
14291429
// if the text length or the defined return length (number of characters to return)
14301430
// is <=0, then return an empty string.
14311431
if (text_len == 0 || return_length <= 0) {
@@ -1490,9 +1490,9 @@ const char* lpad(gdv_int64 context, const char* text, gdv_int32 text_len,
14901490
}
14911491

14921492
FORCE_INLINE
1493-
const char* rpad(gdv_int64 context, const char* text, gdv_int32 text_len,
1494-
gdv_int32 return_length, const char* fill_text, gdv_int32 fill_text_len,
1495-
gdv_int32* out_len) {
1493+
const char* rpad_utf8_int32_utf8(gdv_int64 context, const char* text, gdv_int32 text_len,
1494+
gdv_int32 return_length, const char* fill_text,
1495+
gdv_int32 fill_text_len, gdv_int32* out_len) {
14961496
// if the text length or the defined return length (number of characters to return)
14971497
// is <=0, then return an empty string.
14981498
if (text_len == 0 || return_length <= 0) {
@@ -1556,103 +1556,15 @@ const char* rpad(gdv_int64 context, const char* text, gdv_int32 text_len,
15561556
}
15571557

15581558
FORCE_INLINE
1559-
const char* lpad_no_fill_text(gdv_int64 context, const char* text, gdv_int32 text_len,
1560-
gdv_int32 return_length, gdv_int32* out_len) {
1561-
// if the text length or the defined return length (number of characters to return)
1562-
// is <=0, then return an empty string.
1563-
if (text_len == 0 || return_length <= 0) {
1564-
*out_len = 0;
1565-
return "";
1566-
}
1567-
1568-
// initially counts the number of utf8 characters in the defined text and fill_text
1569-
int32_t text_char_count = utf8_length(context, text, text_len);
1570-
// text_char_count is zero if input has invalid utf8 char
1571-
// fill_char_count is zero if fill_text_len is > 0 and its value has invalid utf8 char
1572-
if (text_char_count == 0) {
1573-
*out_len = 0;
1574-
return "";
1575-
}
1576-
1577-
if (return_length == text_char_count) {
1578-
// case where the return length is same as the text's length, or if it need to
1579-
// fill into text but "fill_text" is empty, then return text directly.
1580-
*out_len = text_len;
1581-
return text;
1582-
} else if (return_length < text_char_count) {
1583-
// case where it truncates the result on return length.
1584-
*out_len = utf8_byte_pos(context, text, text_len, return_length);
1585-
return text;
1586-
} else {
1587-
// case (return_length > text_char_count)
1588-
// case where it needs to copy "fill_text" on the string left. The total number
1589-
// of chars to copy is given by (return_length - text_char_count)
1590-
char* ret = reinterpret_cast<gdv_binary>(gdv_fn_context_arena_malloc(
1591-
context, text_len + (return_length - text_char_count)));
1592-
if (ret == nullptr) {
1593-
gdv_fn_context_set_error_msg(context,
1594-
"Could not allocate memory for output string");
1595-
*out_len = 0;
1596-
return "";
1597-
}
1598-
const char* blank_space = " ";
1599-
for (int i = 0; i < return_length - text_char_count; ++i) {
1600-
ret[i] = blank_space[0];
1601-
}
1602-
memcpy(ret + return_length - text_char_count, text, text_len);
1603-
*out_len = text_len + (return_length - text_char_count);
1604-
return ret;
1605-
}
1559+
const char* lpad_utf8_int32(gdv_int64 context, const char* text, gdv_int32 text_len,
1560+
gdv_int32 return_length, gdv_int32* out_len) {
1561+
return lpad_utf8_int32_utf8(context, text, text_len, return_length, " ", 1, out_len);
16061562
}
16071563

16081564
FORCE_INLINE
1609-
const char* rpad_no_fill_text(gdv_int64 context, const char* text, gdv_int32 text_len,
1610-
gdv_int32 return_length, gdv_int32* out_len) {
1611-
// if the text length or the defined return length (number of characters to return)
1612-
// is <=0, then return an empty string.
1613-
if (text_len == 0 || return_length <= 0) {
1614-
*out_len = 0;
1615-
return "";
1616-
}
1617-
1618-
// initially counts the number of utf8 characters in the defined text and fill_text
1619-
int32_t text_char_count = utf8_length(context, text, text_len);
1620-
// text_char_count is zero if input has invalid utf8 char
1621-
// fill_char_count is zero if fill_text_len is > 0 and its value has invalid utf8 char
1622-
if (text_char_count == 0) {
1623-
*out_len = 0;
1624-
return "";
1625-
}
1626-
1627-
if (return_length == text_char_count) {
1628-
// case where the return length is same as the text's length, or if it need to
1629-
// fill into text but "fill_text" is empty, then return text directly.
1630-
*out_len = text_len;
1631-
return text;
1632-
} else if (return_length < text_char_count) {
1633-
// case where it truncates the result on return length.
1634-
*out_len = utf8_byte_pos(context, text, text_len, return_length);
1635-
return text;
1636-
} else {
1637-
// case (return_length > text_char_count)
1638-
// case where it needs to copy "fill_text" on the string right
1639-
char* ret = reinterpret_cast<gdv_binary>(gdv_fn_context_arena_malloc(
1640-
context, text_len + (return_length - text_char_count)));
1641-
if (ret == nullptr) {
1642-
gdv_fn_context_set_error_msg(context,
1643-
"Could not allocate memory for output string");
1644-
*out_len = 0;
1645-
return "";
1646-
}
1647-
// fulfill the initial text copying the main string input
1648-
memcpy(ret, text, text_len);
1649-
const char* blank_space = " ";
1650-
for (int i = 0; i < return_length - text_char_count; ++i) {
1651-
ret[text_len + i] = blank_space[0];
1652-
}
1653-
*out_len = text_len + (return_length - text_char_count);
1654-
return ret;
1655-
}
1565+
const char* rpad_utf8_int32(gdv_int64 context, const char* text, gdv_int32 text_len,
1566+
gdv_int32 return_length, gdv_int32* out_len) {
1567+
return rpad_utf8_int32_utf8(context, text, text_len, return_length, " ", 1, out_len);
16561568
}
16571569

16581570
FORCE_INLINE

cpp/src/gandiva/precompiled/string_ops_test.cc

Lines changed: 40 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -703,65 +703,65 @@ TEST(TestStringOps, TestLpadString) {
703703
const char* out_str;
704704

705705
// LPAD function tests - with defined fill pad text
706-
out_str = lpad(ctx_ptr, "TestString", 10, 4, "fill", 4, &out_len);
706+
out_str = lpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 4, "fill", 4, &out_len);
707707
EXPECT_EQ(std::string(out_str, out_len), "Test");
708708

709-
out_str = lpad(ctx_ptr, "TestString", 10, 10, "fill", 4, &out_len);
709+
out_str = lpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 10, "fill", 4, &out_len);
710710
EXPECT_EQ(std::string(out_str, out_len), "TestString");
711711

712-
out_str = lpad(ctx_ptr, "TestString", 0, 10, "fill", 4, &out_len);
712+
out_str = lpad_utf8_int32_utf8(ctx_ptr, "TestString", 0, 10, "fill", 4, &out_len);
713713
EXPECT_EQ(std::string(out_str, out_len), "");
714714

715-
out_str = lpad(ctx_ptr, "TestString", 10, 0, "fill", 4, &out_len);
715+
out_str = lpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 0, "fill", 4, &out_len);
716716
EXPECT_EQ(std::string(out_str, out_len), "");
717717

718-
out_str = lpad(ctx_ptr, "TestString", 10, -500, "fill", 4, &out_len);
718+
out_str = lpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, -500, "fill", 4, &out_len);
719719
EXPECT_EQ(std::string(out_str, out_len), "");
720720

721-
out_str = lpad(ctx_ptr, "TestString", 10, 500, "", 0, &out_len);
721+
out_str = lpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 500, "", 0, &out_len);
722722
EXPECT_EQ(std::string(out_str, out_len), "TestString");
723723

724-
out_str = lpad(ctx_ptr, "TestString", 10, 18, "Fill", 4, &out_len);
724+
out_str = lpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 18, "Fill", 4, &out_len);
725725
EXPECT_EQ(std::string(out_str, out_len), "FillFillTestString");
726726

727-
out_str = lpad(ctx_ptr, "TestString", 10, 15, "Fill", 4, &out_len);
727+
out_str = lpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 15, "Fill", 4, &out_len);
728728
EXPECT_EQ(std::string(out_str, out_len), "FillFTestString");
729729

730-
out_str = lpad(ctx_ptr, "TestString", 10, 20, "Fill", 4, &out_len);
730+
out_str = lpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 20, "Fill", 4, &out_len);
731731
EXPECT_EQ(std::string(out_str, out_len), "FillFillFiTestString");
732732

733-
out_str = lpad(ctx_ptr, "абвгд", 10, 7, "д", 2, &out_len);
733+
out_str = lpad_utf8_int32_utf8(ctx_ptr, "абвгд", 10, 7, "д", 2, &out_len);
734734
EXPECT_EQ(std::string(out_str, out_len), "ддабвгд");
735735

736-
out_str = lpad(ctx_ptr, "абвгд", 10, 20, "абвгд", 10, &out_len);
736+
out_str = lpad_utf8_int32_utf8(ctx_ptr, "абвгд", 10, 20, "абвгд", 10, &out_len);
737737
EXPECT_EQ(std::string(out_str, out_len), "абвгдабвгдабвгдабвгд");
738738

739-
out_str = lpad(ctx_ptr, "hello", 5, 6, "д", 2, &out_len);
739+
out_str = lpad_utf8_int32_utf8(ctx_ptr, "hello", 5, 6, "д", 2, &out_len);
740740
EXPECT_EQ(std::string(out_str, out_len), "дhello");
741741

742742
// LPAD function tests - with NO pad text
743-
out_str = lpad_no_fill_text(ctx_ptr, "TestString", 10, 4, &out_len);
743+
out_str = lpad_utf8_int32(ctx_ptr, "TestString", 10, 4, &out_len);
744744
EXPECT_EQ(std::string(out_str, out_len), "Test");
745745

746-
out_str = lpad_no_fill_text(ctx_ptr, "TestString", 10, 10, &out_len);
746+
out_str = lpad_utf8_int32(ctx_ptr, "TestString", 10, 10, &out_len);
747747
EXPECT_EQ(std::string(out_str, out_len), "TestString");
748748

749-
out_str = lpad_no_fill_text(ctx_ptr, "TestString", 0, 10, &out_len);
749+
out_str = lpad_utf8_int32(ctx_ptr, "TestString", 0, 10, &out_len);
750750
EXPECT_EQ(std::string(out_str, out_len), "");
751751

752-
out_str = lpad_no_fill_text(ctx_ptr, "TestString", 10, 0, &out_len);
752+
out_str = lpad_utf8_int32(ctx_ptr, "TestString", 10, 0, &out_len);
753753
EXPECT_EQ(std::string(out_str, out_len), "");
754754

755-
out_str = lpad_no_fill_text(ctx_ptr, "TestString", 10, -500, &out_len);
755+
out_str = lpad_utf8_int32(ctx_ptr, "TestString", 10, -500, &out_len);
756756
EXPECT_EQ(std::string(out_str, out_len), "");
757757

758-
out_str = lpad_no_fill_text(ctx_ptr, "TestString", 10, 18, &out_len);
758+
out_str = lpad_utf8_int32(ctx_ptr, "TestString", 10, 18, &out_len);
759759
EXPECT_EQ(std::string(out_str, out_len), " TestString");
760760

761-
out_str = lpad_no_fill_text(ctx_ptr, "TestString", 10, 15, &out_len);
761+
out_str = lpad_utf8_int32(ctx_ptr, "TestString", 10, 15, &out_len);
762762
EXPECT_EQ(std::string(out_str, out_len), " TestString");
763763

764-
out_str = lpad_no_fill_text(ctx_ptr, "абвгд", 10, 7, &out_len);
764+
out_str = lpad_utf8_int32(ctx_ptr, "абвгд", 10, 7, &out_len);
765765
EXPECT_EQ(std::string(out_str, out_len), " абвгд");
766766
}
767767

@@ -772,65 +772,65 @@ TEST(TestStringOps, TestRpadString) {
772772
const char* out_str;
773773

774774
// RPAD function tests - with defined fill pad text
775-
out_str = rpad(ctx_ptr, "TestString", 10, 4, "fill", 4, &out_len);
775+
out_str = rpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 4, "fill", 4, &out_len);
776776
EXPECT_EQ(std::string(out_str, out_len), "Test");
777777

778-
out_str = rpad(ctx_ptr, "TestString", 10, 10, "fill", 4, &out_len);
778+
out_str = rpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 10, "fill", 4, &out_len);
779779
EXPECT_EQ(std::string(out_str, out_len), "TestString");
780780

781-
out_str = rpad(ctx_ptr, "TestString", 0, 10, "fill", 4, &out_len);
781+
out_str = rpad_utf8_int32_utf8(ctx_ptr, "TestString", 0, 10, "fill", 4, &out_len);
782782
EXPECT_EQ(std::string(out_str, out_len), "");
783783

784-
out_str = rpad(ctx_ptr, "TestString", 10, 0, "fill", 4, &out_len);
784+
out_str = rpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 0, "fill", 4, &out_len);
785785
EXPECT_EQ(std::string(out_str, out_len), "");
786786

787-
out_str = rpad(ctx_ptr, "TestString", 10, -500, "fill", 4, &out_len);
787+
out_str = rpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, -500, "fill", 4, &out_len);
788788
EXPECT_EQ(std::string(out_str, out_len), "");
789789

790-
out_str = rpad(ctx_ptr, "TestString", 10, 500, "", 0, &out_len);
790+
out_str = rpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 500, "", 0, &out_len);
791791
EXPECT_EQ(std::string(out_str, out_len), "TestString");
792792

793-
out_str = rpad(ctx_ptr, "TestString", 10, 18, "Fill", 4, &out_len);
793+
out_str = rpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 18, "Fill", 4, &out_len);
794794
EXPECT_EQ(std::string(out_str, out_len), "TestStringFillFill");
795795

796-
out_str = rpad(ctx_ptr, "TestString", 10, 15, "Fill", 4, &out_len);
796+
out_str = rpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 15, "Fill", 4, &out_len);
797797
EXPECT_EQ(std::string(out_str, out_len), "TestStringFillF");
798798

799-
out_str = rpad(ctx_ptr, "TestString", 10, 20, "Fill", 4, &out_len);
799+
out_str = rpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 20, "Fill", 4, &out_len);
800800
EXPECT_EQ(std::string(out_str, out_len), "TestStringFillFillFi");
801801

802-
out_str = rpad(ctx_ptr, "абвгд", 10, 7, "д", 2, &out_len);
802+
out_str = rpad_utf8_int32_utf8(ctx_ptr, "абвгд", 10, 7, "д", 2, &out_len);
803803
EXPECT_EQ(std::string(out_str, out_len), "абвгддд");
804804

805-
out_str = rpad(ctx_ptr, "абвгд", 10, 20, "абвгд", 10, &out_len);
805+
out_str = rpad_utf8_int32_utf8(ctx_ptr, "абвгд", 10, 20, "абвгд", 10, &out_len);
806806
EXPECT_EQ(std::string(out_str, out_len), "абвгдабвгдабвгдабвгд");
807807

808-
out_str = rpad(ctx_ptr, "hello", 5, 6, "д", 2, &out_len);
808+
out_str = rpad_utf8_int32_utf8(ctx_ptr, "hello", 5, 6, "д", 2, &out_len);
809809
EXPECT_EQ(std::string(out_str, out_len), "helloд");
810810

811811
// RPAD function tests - with NO pad text
812-
out_str = rpad_no_fill_text(ctx_ptr, "TestString", 10, 4, &out_len);
812+
out_str = rpad_utf8_int32(ctx_ptr, "TestString", 10, 4, &out_len);
813813
EXPECT_EQ(std::string(out_str, out_len), "Test");
814814

815-
out_str = rpad_no_fill_text(ctx_ptr, "TestString", 10, 10, &out_len);
815+
out_str = rpad_utf8_int32(ctx_ptr, "TestString", 10, 10, &out_len);
816816
EXPECT_EQ(std::string(out_str, out_len), "TestString");
817817

818-
out_str = rpad_no_fill_text(ctx_ptr, "TestString", 0, 10, &out_len);
818+
out_str = rpad_utf8_int32(ctx_ptr, "TestString", 0, 10, &out_len);
819819
EXPECT_EQ(std::string(out_str, out_len), "");
820820

821-
out_str = rpad_no_fill_text(ctx_ptr, "TestString", 10, 0, &out_len);
821+
out_str = rpad_utf8_int32(ctx_ptr, "TestString", 10, 0, &out_len);
822822
EXPECT_EQ(std::string(out_str, out_len), "");
823823

824-
out_str = rpad_no_fill_text(ctx_ptr, "TestString", 10, -500, &out_len);
824+
out_str = rpad_utf8_int32(ctx_ptr, "TestString", 10, -500, &out_len);
825825
EXPECT_EQ(std::string(out_str, out_len), "");
826826

827-
out_str = rpad_no_fill_text(ctx_ptr, "TestString", 10, 18, &out_len);
827+
out_str = rpad_utf8_int32(ctx_ptr, "TestString", 10, 18, &out_len);
828828
EXPECT_EQ(std::string(out_str, out_len), "TestString ");
829829

830-
out_str = rpad_no_fill_text(ctx_ptr, "TestString", 10, 15, &out_len);
830+
out_str = rpad_utf8_int32(ctx_ptr, "TestString", 10, 15, &out_len);
831831
EXPECT_EQ(std::string(out_str, out_len), "TestString ");
832832

833-
out_str = rpad_no_fill_text(ctx_ptr, "абвгд", 10, 7, &out_len);
833+
out_str = rpad_utf8_int32(ctx_ptr, "абвгд", 10, 7, &out_len);
834834
EXPECT_EQ(std::string(out_str, out_len), "абвгд ");
835835
}
836836

cpp/src/gandiva/precompiled/types.h

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -407,18 +407,18 @@ gdv_int32 locate_utf8_utf8_int32(gdv_int64 context, const char* sub_str,
407407
gdv_int32 sub_str_len, const char* str,
408408
gdv_int32 str_len, gdv_int32 start_pos);
409409

410-
const char* lpad(gdv_int64 context, const char* text, gdv_int32 text_len,
411-
gdv_int32 return_length, const char* fill_text, gdv_int32 fill_text_len,
412-
gdv_int32* out_len);
410+
const char* lpad_utf8_int32_utf8(gdv_int64 context, const char* text, gdv_int32 text_len,
411+
gdv_int32 return_length, const char* fill_text,
412+
gdv_int32 fill_text_len, gdv_int32* out_len);
413413

414-
const char* rpad(gdv_int64 context, const char* text, gdv_int32 text_len,
415-
gdv_int32 return_length, const char* fill_text, gdv_int32 fill_text_len,
416-
gdv_int32* out_len);
414+
const char* rpad_utf8_int32_utf8(gdv_int64 context, const char* text, gdv_int32 text_len,
415+
gdv_int32 return_length, const char* fill_text,
416+
gdv_int32 fill_text_len, gdv_int32* out_len);
417417

418-
const char* lpad_no_fill_text(gdv_int64 context, const char* text, gdv_int32 text_len,
419-
gdv_int32 return_length, gdv_int32* out_len);
418+
const char* lpad_utf8_int32(gdv_int64 context, const char* text, gdv_int32 text_len,
419+
gdv_int32 return_length, gdv_int32* out_len);
420420

421-
const char* rpad_no_fill_text(gdv_int64 context, const char* text, gdv_int32 text_len,
421+
const char* rpad_utf8_int32(gdv_int64 context, const char* text, gdv_int32 text_len,
422422
gdv_int32 return_length, gdv_int32* out_len);
423423

424424
const char* replace_with_max_len_utf8_utf8_utf8(gdv_int64 context, const char* text,

0 commit comments

Comments
 (0)