Skip to content

Commit de1ffd3

Browse files
authored
Merge pull request #95 from static-frame/94/dta-opt
`delimited_to_arrays` optimizations
2 parents 3866793 + 661a778 commit de1ffd3

File tree

2 files changed

+7
-8
lines changed

2 files changed

+7
-8
lines changed

performance/__main__.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,13 +51,12 @@
5151

5252
class Perf:
5353
FUNCTIONS = ('main',)
54-
NUMBER = 500_000
54+
NUMBER = 10
5555

5656
class FixtureFileLike:
5757

58-
COUNT_ROW = 100_000
59-
COUNT_COLUMN = 500
60-
NUMBER = 1
58+
COUNT_ROW = 1_000_000
59+
COUNT_COLUMN = 10
6160

6261
def __init__(self):
6362
records_int = [','.join(str(x) for x in range(self.COUNT_COLUMN))] * self.COUNT_ROW

src/_arraykit.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1465,11 +1465,11 @@ AK_CPL_CurrentAdvance(AK_CodePointLine* cpl)
14651465

14661466
//------------------------------------------------------------------------------
14671467
// This will take any case of "TRUE" as True, while marking everything else as False; this is the same approach taken with genfromtxt when the dtype is given as bool. This will not fail for invalid true or false strings.
1468-
static inline bool
1468+
static inline npy_int8
14691469
AK_CPL_current_to_bool(AK_CodePointLine* cpl) {
14701470
// must have at least 4 characters
14711471
if (cpl->offsets[cpl->offsets_current_index] < 4) {
1472-
return false;
1472+
return 0;
14731473
}
14741474
Py_UCS4 *p = cpl->buffer_current_ptr;
14751475
Py_UCS4 *end = p + 4; // we must have at least 4 characters for True
@@ -1484,10 +1484,10 @@ AK_CPL_current_to_bool(AK_CodePointLine* cpl) {
14841484
++i;
14851485
}
14861486
else {
1487-
return false;
1487+
return 0;
14881488
}
14891489
}
1490-
return true; //matched all characters
1490+
return 1; //matched all characters
14911491
}
14921492

14931493
// NOTE: using PyOS_strtol was an alternative, but needed to be passed a null-terminated char, which would require copying the data out of the CPL. This approach reads directly from the CPL without copying.

0 commit comments

Comments
 (0)