Skip to content

Commit 585cad3

Browse files
committed
Add base implementation and tests for LPAD function without pad texts considering string input values
1 parent 73927fc commit 585cad3

File tree

3 files changed

+81
-0
lines changed

3 files changed

+81
-0
lines changed

cpp/src/gandiva/precompiled/string_ops.cc

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1489,6 +1489,58 @@ const char* lpad(gdv_int64 context, const char* text, gdv_int32 text_len,
14891489
}
14901490
}
14911491

1492+
FORCE_INLINE
1493+
const char* lpad_no_fill_text(gdv_int64 context, const char* text, gdv_int32 text_len,
1494+
gdv_int32 return_length, gdv_int32* out_len) {
1495+
// if the text length or the defined return length (number of characters to return)
1496+
// is <=0, then return an empty string.
1497+
if (text_len == 0 || return_length <= 0) {
1498+
*out_len = 0;
1499+
return "";
1500+
}
1501+
1502+
// initially counts the number of utf8 characters in the defined text and fill_text
1503+
int32_t text_char_count = utf8_length(context, text, text_len);
1504+
// text_char_count is zero if input has invalid utf8 char
1505+
// fill_char_count is zero if fill_text_len is > 0 and its value has invalid utf8 char
1506+
if (text_char_count == 0) {
1507+
*out_len = 0;
1508+
return "";
1509+
}
1510+
1511+
if (return_length == text_char_count) {
1512+
// case where the return length is same as the text's length, or if it need to
1513+
// fill into text but "fill_text" is empty, then return text directly.
1514+
*out_len = text_len;
1515+
return text;
1516+
} else if (return_length < text_char_count) {
1517+
// case where it truncates the result on return length.
1518+
*out_len = utf8_byte_pos(context, text, text_len, return_length);
1519+
return text;
1520+
} else {
1521+
// case (return_length > text_char_count)
1522+
// case where it needs to copy "fill_text" on the string left. The total number
1523+
// of chars to copy is given by (return_length - text_char_count)
1524+
char* ret =
1525+
reinterpret_cast<gdv_binary>(gdv_fn_context_arena_malloc(
1526+
context,
1527+
text_len + (return_length - text_char_count)));
1528+
if (ret == nullptr) {
1529+
gdv_fn_context_set_error_msg(context,
1530+
"Could not allocate memory for output string");
1531+
*out_len = 0;
1532+
return "";
1533+
}
1534+
const char* blank_space = " ";
1535+
for (int i = 0; i < return_length - text_char_count; ++i) {
1536+
ret[i] = blank_space[0];
1537+
}
1538+
memcpy(ret + return_length - text_char_count, text, text_len);
1539+
*out_len = text_len + (return_length - text_char_count);
1540+
return ret;
1541+
}
1542+
}
1543+
14921544
FORCE_INLINE
14931545
const char* split_part(gdv_int64 context, const char* text, gdv_int32 text_len,
14941546
const char* delimiter, gdv_int32 delim_len, gdv_int32 index,

cpp/src/gandiva/precompiled/string_ops_test.cc

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -702,6 +702,7 @@ TEST(TestStringOps, TestLpadString) {
702702
gdv_int32 out_len = 0;
703703
const char* out_str;
704704

705+
// LPAD function tests - with defined fill pad text
705706
out_str = lpad(ctx_ptr, "TestString", 10, 4, "fill", 4, &out_len);
706707
EXPECT_EQ(std::string(out_str, out_len), "Test");
707708

@@ -737,6 +738,31 @@ TEST(TestStringOps, TestLpadString) {
737738

738739
out_str = lpad(ctx_ptr, "hello", 5, 6, "д", 2, &out_len);
739740
EXPECT_EQ(std::string(out_str, out_len), "дhello");
741+
742+
// LPAD function tests - with NO pad text
743+
out_str = lpad_no_fill_text(ctx_ptr, "TestString", 10, 4, &out_len);
744+
EXPECT_EQ(std::string(out_str, out_len), "Test");
745+
746+
out_str = lpad_no_fill_text(ctx_ptr, "TestString", 10, 10, &out_len);
747+
EXPECT_EQ(std::string(out_str, out_len), "TestString");
748+
749+
out_str = lpad_no_fill_text(ctx_ptr, "TestString", 0, 10, &out_len);
750+
EXPECT_EQ(std::string(out_str, out_len), "");
751+
752+
out_str = lpad_no_fill_text(ctx_ptr, "TestString", 10, 0,&out_len);
753+
EXPECT_EQ(std::string(out_str, out_len), "");
754+
755+
out_str = lpad_no_fill_text(ctx_ptr, "TestString", 10, -500, &out_len);
756+
EXPECT_EQ(std::string(out_str, out_len), "");
757+
758+
out_str = lpad_no_fill_text(ctx_ptr, "TestString", 10, 18, &out_len);
759+
EXPECT_EQ(std::string(out_str, out_len), " TestString");
760+
761+
out_str = lpad_no_fill_text(ctx_ptr, "TestString", 10, 15, &out_len);
762+
EXPECT_EQ(std::string(out_str, out_len), " TestString");
763+
764+
out_str = lpad_no_fill_text(ctx_ptr, "абвгд", 10, 7, &out_len);
765+
EXPECT_EQ(std::string(out_str, out_len), " абвгд");
740766
}
741767

742768
TEST(TestStringOps, TestRtrim) {

cpp/src/gandiva/precompiled/types.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -411,6 +411,9 @@ const char* lpad(gdv_int64 context, const char* text, gdv_int32 text_len,
411411
gdv_int32 return_length, const char* fill_text, gdv_int32 fill_text_len,
412412
gdv_int32* out_len);
413413

414+
const char* lpad_no_fill_text(gdv_int64 context, const char* text, gdv_int32 text_len,
415+
gdv_int32 return_length, gdv_int32* out_len);
416+
414417
const char* replace_with_max_len_utf8_utf8_utf8(gdv_int64 context, const char* text,
415418
gdv_int32 text_len, const char* from_str,
416419
gdv_int32 from_str_len,

0 commit comments

Comments
 (0)