Skip to content

Commit

Permalink
string and peg replacement functions can now take functions
Browse files Browse the repository at this point in the history
Functions will be invoked with the matched text, and their result will be
coerced to a string and used as the new replacement text.

This also allows passing non-function, non-byteviewable values, which will be
converted into strings during replacement (only once, and only if at least
one match is found).
  • Loading branch information
ianthehenry committed Apr 23, 2023
1 parent d9ed7a7 commit 485099f
Show file tree
Hide file tree
Showing 6 changed files with 79 additions and 16 deletions.
19 changes: 13 additions & 6 deletions src/core/peg.c
Original file line number Diff line number Diff line change
Expand Up @@ -1637,7 +1637,7 @@ typedef struct {
JanetPeg *peg;
PegState s;
JanetByteView bytes;
JanetByteView repl;
Janet subst;
int32_t start;
} PegCall;

Expand All @@ -1653,7 +1653,7 @@ static PegCall peg_cfun_init(int32_t argc, Janet *argv, int get_replace) {
ret.peg = compile_peg(argv[0]);
}
if (get_replace) {
ret.repl = janet_getbytes(argv, 1);
ret.subst = argv[1];
ret.bytes = janet_getbytes(argv, 2);
} else {
ret.bytes = janet_getbytes(argv, 1);
Expand Down Expand Up @@ -1738,7 +1738,8 @@ static Janet cfun_peg_replace_generic(int32_t argc, Janet *argv, int only_one) {
trail = i;
}
int32_t nexti = (int32_t)(result - c.bytes.bytes);
janet_buffer_push_bytes(ret, c.repl.bytes, c.repl.len);
JanetByteView subst = janet_text_substitution(&c.subst, c.bytes.bytes + i, nexti - i);
janet_buffer_push_bytes(ret, subst.bytes, subst.len);
trail = nexti;
if (nexti == i) nexti++;
i = nexti;
Expand All @@ -1754,14 +1755,20 @@ static Janet cfun_peg_replace_generic(int32_t argc, Janet *argv, int only_one) {
}

JANET_CORE_FN(cfun_peg_replace_all,
"(peg/replace-all peg repl text &opt start & args)",
"Replace all matches of peg in text with repl, returning a new buffer. The peg does not need to make captures to do replacement.") {
"(peg/replace-all peg subst text &opt start & args)",
"Replace all matches of `peg` in `text` with `subst`, returning a new buffer. "
"The peg does not need to make captures to do replacement. "
"If `subst` is a function, it will be called once for each match "
"and should return the actual replacement text to use.") {
return cfun_peg_replace_generic(argc, argv, 0);
}

JANET_CORE_FN(cfun_peg_replace,
"(peg/replace peg repl text &opt start & args)",
"Replace first match of peg in text with repl, returning a new buffer. The peg does not need to make captures to do replacement. "
"Replace first match of `peg` in `text` with `subst`, returning a new buffer. "
"The peg does not need to make captures to do replacement. "
"If `subst` is a function, it will be called with the matching text, "
"and should return the actual replacement text to use. "
"If no matches are found, returns the input string in a new buffer.") {
return cfun_peg_replace_generic(argc, argv, 1);
}
Expand Down
22 changes: 13 additions & 9 deletions src/core/string.c
Original file line number Diff line number Diff line change
Expand Up @@ -364,14 +364,13 @@ JANET_CORE_FN(cfun_string_findall,

struct replace_state {
struct kmp_state kmp;
const uint8_t *subst;
int32_t substlen;
Janet subst;
};

static void replacesetup(int32_t argc, Janet *argv, struct replace_state *s) {
janet_arity(argc, 3, 4);
JanetByteView pat = janet_getbytes(argv, 0);
JanetByteView subst = janet_getbytes(argv, 1);
Janet subst = argv[1];
JanetByteView text = janet_getbytes(argv, 2);
int32_t start = 0;
if (argc == 4) {
Expand All @@ -380,13 +379,14 @@ static void replacesetup(int32_t argc, Janet *argv, struct replace_state *s) {
}
kmp_init(&s->kmp, text.bytes, text.len, pat.bytes, pat.len);
s->kmp.i = start;
s->subst = subst.bytes;
s->substlen = subst.len;
s->subst = subst;
}

JANET_CORE_FN(cfun_string_replace,
"(string/replace patt subst str)",
"Replace the first occurrence of `patt` with `subst` in the string `str`. "
"If `subst` is a function, it will be called with `patt` only if a match is found, "
"and should return the actual replacement text to use. "
"Will return the new string if `patt` is found, otherwise returns `str`.") {
int32_t result;
struct replace_state s;
Expand All @@ -397,10 +397,11 @@ JANET_CORE_FN(cfun_string_replace,
kmp_deinit(&s.kmp);
return janet_stringv(s.kmp.text, s.kmp.textlen);
}
buf = janet_string_begin(s.kmp.textlen - s.kmp.patlen + s.substlen);
JanetByteView subst = janet_text_substitution(&s.subst, s.kmp.text + result, s.kmp.patlen);
buf = janet_string_begin(s.kmp.textlen - s.kmp.patlen + subst.len);
safe_memcpy(buf, s.kmp.text, result);
safe_memcpy(buf + result, s.subst, s.substlen);
safe_memcpy(buf + result + s.substlen,
safe_memcpy(buf + result, subst.bytes, subst.len);
safe_memcpy(buf + result + subst.len,
s.kmp.text + result + s.kmp.patlen,
s.kmp.textlen - result - s.kmp.patlen);
kmp_deinit(&s.kmp);
Expand All @@ -411,6 +412,8 @@ JANET_CORE_FN(cfun_string_replaceall,
"(string/replace-all patt subst str)",
"Replace all instances of `patt` with `subst` in the string `str`. Overlapping "
"matches will not be counted, only the first match in such a span will be replaced. "
"If `subst` is a function, it will be called with `patt` once for each match, "
"and should return the actual replacement text to use. "
"Will return the new string if `patt` is found, otherwise returns `str`.") {
int32_t result;
struct replace_state s;
Expand All @@ -419,8 +422,9 @@ JANET_CORE_FN(cfun_string_replaceall,
replacesetup(argc, argv, &s);
janet_buffer_init(&b, s.kmp.textlen);
while ((result = kmp_next(&s.kmp)) >= 0) {
JanetByteView subst = janet_text_substitution(&s.subst, s.kmp.text + result, s.kmp.patlen);
janet_buffer_push_bytes(&b, s.kmp.text + lastindex, result - lastindex);
janet_buffer_push_bytes(&b, s.subst, s.substlen);
janet_buffer_push_bytes(&b, subst.bytes, subst.len);
lastindex = result + s.kmp.patlen;
kmp_seti(&s.kmp, lastindex);
}
Expand Down
40 changes: 40 additions & 0 deletions src/core/util.c
Original file line number Diff line number Diff line change
Expand Up @@ -663,6 +663,46 @@ JanetBinding janet_binding_from_entry(Janet entry) {
return binding;
}

/* If the value at the given address can be coerced to a byte view,
return that byte view. If it can't, replace the value at the address
with the result of janet_to_string, and return a byte view over that
string. */
static JanetByteView memoize_byte_view(Janet *value) {
JanetByteView result;
if (!janet_bytes_view(*value, &result.bytes, &result.len)) {
JanetString str = janet_to_string(*value);
*value = janet_wrap_string(str);
result.bytes = str;
result.len = janet_string_length(str);
}
return result;
}

static JanetByteView to_byte_view(Janet value) {
JanetByteView result;
if (!janet_bytes_view(value, &result.bytes, &result.len)) {
JanetString str = janet_to_string(value);
result.bytes = str;
result.len = janet_string_length(str);
}
return result;
}

JanetByteView janet_text_substitution(Janet *subst, const uint8_t *bytes, uint32_t len) {
switch (janet_type(*subst)) {
case JANET_CFUNCTION: {
Janet matched = janet_stringv(bytes, len);
return to_byte_view(janet_unwrap_cfunction(*subst)(1, &matched));
}
case JANET_FUNCTION: {
Janet matched = janet_stringv(bytes, len);
return to_byte_view(janet_call(janet_unwrap_function(*subst), 1, &matched));
}
default:
return memoize_byte_view(subst);
}
}

JanetBinding janet_resolve_ext(JanetTable *env, const uint8_t *sym) {
Janet entry = janet_table_get(env, janet_wrap_symbol(sym));
return janet_binding_from_entry(entry);
Expand Down
1 change: 1 addition & 0 deletions src/core/util.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ void janet_buffer_format(
Janet *argv);
Janet janet_next_impl(Janet ds, Janet key, int is_interpreter);
JanetBinding janet_binding_from_entry(Janet entry);
JanetByteView janet_text_substitution(Janet *subst, const uint8_t *bytes, uint32_t len);

/* Registry functions */
void janet_registry_put(
Expand Down
4 changes: 4 additions & 0 deletions test/suite0002.janet
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,10 @@
(assert (= (string/replace "X" "." "XXX...XXX...XXX") ".XX...XXX...XXX") "string/replace 1")
(assert (= (string/replace-all "X" "." "XXX...XXX...XXX") "...............") "string/replace-all 1")
(assert (= (string/replace-all "XX" "." "XXX...XXX...XXX") ".X....X....X") "string/replace-all 2")
(assert (= (string/replace "xx" string/ascii-upper "xxyxyxyxxxy") "XXyxyxyxxxy") "string/replace function")
(assert (= (string/replace-all "xx" string/ascii-upper "xxyxyxyxxxy") "XXyxyxyXXxy") "string/replace-all function")
(assert (= (string/replace "x" 12 "xyx") "12yx") "string/replace stringable")
(assert (= (string/replace-all "x" 12 "xyx") "12y12") "string/replace-all stringable")
(assert (= (string/ascii-lower "ABCabc&^%!@:;.") "abcabc&^%!@:;.") "string/ascii-lower")
(assert (= (string/ascii-upper "ABCabc&^%!@:;.") "ABCABC&^%!@:;.") "string/ascii-lower")
(assert (= (string/reverse "") "") "string/reverse 1")
Expand Down
9 changes: 8 additions & 1 deletion test/suite0008.janet
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,6 @@ neldb\0\0\0\xD8\x05printG\x01\0\xDE\xDE\xDE'\x03\0marshal_tes/\x02
(assert (deep= (peg/find-all '"/" p) @[0 4 10 14]) "peg find-all")

# Peg replace and replace-all
(var ti 0)
(defn check-replacer
[x y z]
(assert (= (string/replace x y z) (string (peg/replace x y z))) "replacer test replace")
Expand All @@ -339,6 +338,14 @@ neldb\0\0\0\xD8\x05printG\x01\0\xDE\xDE\xDE'\x03\0marshal_tes/\x02
(check-replacer "abc" "Z" "")
(check-replacer "aba" "ZZZZZZ" "ababababababa")
(check-replacer "aba" "" "ababababababa")
(check-replacer "aba" string/ascii-upper "ababababababa")
(check-replacer "aba" 123 "ababababababa")
(assert (= (string (peg/replace-all ~(set "ab") string/ascii-upper "abcaa"))
"ABcAA")
"peg/replace-all cfunction")
(assert (= (string (peg/replace-all ~(set "ab") |$ "abcaa"))
"abcaa")
"peg/replace-all function")

# Peg bug
(assert (deep= @[] (peg/match '(any 1) @"")) "peg empty pattern 1")
Expand Down

0 comments on commit 485099f

Please sign in to comment.