Skip to content

Commit 88479bd

Browse files
CEL Dev Teamcopybara-github
CEL Dev Team
authored andcommitted
Adding the CEL regex extensions
PiperOrigin-RevId: 764892633
1 parent 051218a commit 88479bd

File tree

5 files changed

+539
-2
lines changed

5 files changed

+539
-2
lines changed

extensions/src/main/java/dev/cel/extensions/BUILD.bazel

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ java_library(
2222
":lists",
2323
":math",
2424
":protos",
25+
":regex",
2526
":sets",
2627
":sets_function",
2728
":strings",
@@ -241,3 +242,19 @@ java_library(
241242
"@maven//:com_google_guava_guava",
242243
],
243244
)
245+
246+
java_library(
247+
name = "regex",
248+
srcs = ["CelRegexExtensions.java"],
249+
deps = [
250+
"//checker:checker_builder",
251+
"//common:compiler_common",
252+
"//common/types",
253+
"//compiler:compiler_builder",
254+
"//runtime",
255+
"//runtime:function_binding",
256+
"@maven//:com_google_errorprone_error_prone_annotations",
257+
"@maven//:com_google_guava_guava",
258+
"@maven//:com_google_re2j_re2j",
259+
],
260+
)

extensions/src/main/java/dev/cel/extensions/CelExtensions.java

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ public final class CelExtensions {
3636
private static final CelBindingsExtensions BINDINGS_EXTENSIONS = new CelBindingsExtensions();
3737
private static final CelEncoderExtensions ENCODER_EXTENSIONS = new CelEncoderExtensions();
3838
private static final CelListsExtensions LISTS_EXTENSIONS_ALL = new CelListsExtensions();
39+
private static final CelRegexExtensions REGEX_EXTENSIONS = new CelRegexExtensions();
3940

4041
/**
4142
* Extended functions for string manipulation.
@@ -248,6 +249,18 @@ public static CelListsExtensions lists(Set<CelListsExtensions.Function> function
248249
return new CelListsExtensions(functions);
249250
}
250251

252+
/**
253+
* Extended functions for Regular Expressions.
254+
*
255+
* <p>Refer to README.md for available functions.
256+
*
257+
* <p>This will include all functions denoted in {@link CelRegexExtensions.Function}, including
258+
* any future additions.
259+
*/
260+
public static CelRegexExtensions regex() {
261+
return REGEX_EXTENSIONS;
262+
}
263+
251264
/**
252265
* Retrieves all function names used by every extension libraries.
253266
*
@@ -265,7 +278,9 @@ public static ImmutableSet<String> getAllFunctionNames() {
265278
stream(CelEncoderExtensions.Function.values())
266279
.map(CelEncoderExtensions.Function::getFunction),
267280
stream(CelListsExtensions.Function.values())
268-
.map(CelListsExtensions.Function::getFunction))
281+
.map(CelListsExtensions.Function::getFunction),
282+
stream(CelRegexExtensions.Function.values())
283+
.map(CelRegexExtensions.Function::getFunction))
269284
.collect(toImmutableSet());
270285
}
271286

Lines changed: 266 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,266 @@
1+
// Copyright 2025 Google LLC
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// https://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package dev.cel.extensions;
16+
17+
import com.google.common.collect.ImmutableList;
18+
import com.google.common.collect.ImmutableMap;
19+
import com.google.common.collect.ImmutableSet;
20+
import com.google.errorprone.annotations.Immutable;
21+
import com.google.re2j.Matcher;
22+
import com.google.re2j.Pattern;
23+
import com.google.re2j.PatternSyntaxException;
24+
import dev.cel.checker.CelCheckerBuilder;
25+
import dev.cel.common.CelFunctionDecl;
26+
import dev.cel.common.CelOverloadDecl;
27+
import dev.cel.common.types.ListType;
28+
import dev.cel.common.types.MapType;
29+
import dev.cel.common.types.OptionalType;
30+
import dev.cel.common.types.SimpleType;
31+
import dev.cel.compiler.CelCompilerLibrary;
32+
import dev.cel.runtime.CelFunctionBinding;
33+
import dev.cel.runtime.CelRuntimeBuilder;
34+
import dev.cel.runtime.CelRuntimeLibrary;
35+
import java.util.Optional;
36+
import java.util.Set;
37+
38+
/** Internal implementation of CEL regex extensions. */
39+
@Immutable
40+
final class CelRegexExtensions implements CelCompilerLibrary, CelRuntimeLibrary {
41+
42+
private static final String REGEX_REPLACE_FUNCTION = "regex.replace";
43+
private static final String REGEX_CAPTURE_FUNCTION = "regex.capture";
44+
private static final String REGEX_CAPTUREALL_FUNCTION = "regex.captureAll";
45+
private static final String REGEX_CAPTUREALLNAMED_FUNCTION = "regex.captureAllNamed";
46+
47+
enum Function {
48+
REPLACE(
49+
CelFunctionDecl.newFunctionDeclaration(
50+
REGEX_REPLACE_FUNCTION,
51+
CelOverloadDecl.newGlobalOverload(
52+
"regex_replaceAll_string_string_string",
53+
"Replaces all the matched values using the given replace string.",
54+
SimpleType.STRING,
55+
SimpleType.STRING,
56+
SimpleType.STRING,
57+
SimpleType.STRING),
58+
CelOverloadDecl.newGlobalOverload(
59+
"regex_replaceCount_string_string_string_int",
60+
"Replaces the given number of matched values using the given replace string.",
61+
SimpleType.STRING,
62+
SimpleType.STRING,
63+
SimpleType.STRING,
64+
SimpleType.STRING,
65+
SimpleType.INT)),
66+
ImmutableSet.of(
67+
CelFunctionBinding.from(
68+
"regex_replaceAll_string_string_string",
69+
ImmutableList.of(String.class, String.class, String.class),
70+
(args) -> {
71+
String target = (String) args[0];
72+
String pattern = (String) args[1];
73+
String replaceStr = (String) args[2];
74+
return CelRegexExtensions.replace(target, pattern, replaceStr);
75+
}),
76+
CelFunctionBinding.from(
77+
"regex_replaceCount_string_string_string_int",
78+
ImmutableList.of(String.class, String.class, String.class, Long.class),
79+
(args) -> {
80+
String target = (String) args[0];
81+
String pattern = (String) args[1];
82+
String replaceStr = (String) args[2];
83+
long count = (long) args[3];
84+
return CelRegexExtensions.replace(target, pattern, replaceStr, count);
85+
}))),
86+
CAPTURE(
87+
CelFunctionDecl.newFunctionDeclaration(
88+
REGEX_CAPTURE_FUNCTION,
89+
CelOverloadDecl.newGlobalOverload(
90+
"regex_capture_string_string",
91+
"Returns the first substring that matches the regex.",
92+
OptionalType.create(SimpleType.STRING),
93+
SimpleType.STRING,
94+
SimpleType.STRING)),
95+
ImmutableSet.of(
96+
CelFunctionBinding.from(
97+
"regex_capture_string_string",
98+
String.class,
99+
String.class,
100+
CelRegexExtensions::captureFirstMatch))),
101+
CAPTUREALL(
102+
CelFunctionDecl.newFunctionDeclaration(
103+
REGEX_CAPTUREALL_FUNCTION,
104+
CelOverloadDecl.newGlobalOverload(
105+
"regex_captureAll_string_string",
106+
"Returns an arrat of all substrings that match the regex.",
107+
ListType.create(SimpleType.STRING),
108+
SimpleType.STRING,
109+
SimpleType.STRING)),
110+
ImmutableSet.of(
111+
CelFunctionBinding.from(
112+
"regex_captureAll_string_string",
113+
String.class,
114+
String.class,
115+
CelRegexExtensions::captureAllMatches))),
116+
CAPTUREALLNAMED(
117+
CelFunctionDecl.newFunctionDeclaration(
118+
REGEX_CAPTUREALLNAMED_FUNCTION,
119+
CelOverloadDecl.newGlobalOverload(
120+
"regex_captureAllNamed_string_string",
121+
"Returns a map of all named captured groups as <named_group_name, captured_string>."
122+
+ " Ignores the unnamed capture groups.",
123+
MapType.create(SimpleType.STRING, SimpleType.STRING),
124+
SimpleType.STRING,
125+
SimpleType.STRING)),
126+
ImmutableSet.of(
127+
CelFunctionBinding.from(
128+
"regex_captureAllNamed_string_string",
129+
String.class,
130+
String.class,
131+
CelRegexExtensions::captureAllNamedGroups)));
132+
133+
private final CelFunctionDecl functionDecl;
134+
private final ImmutableSet<CelFunctionBinding> functionBindings;
135+
136+
String getFunction() {
137+
return functionDecl.name();
138+
}
139+
140+
Function(CelFunctionDecl functionDecl, ImmutableSet<CelFunctionBinding> functionBindings) {
141+
this.functionDecl = functionDecl;
142+
this.functionBindings = functionBindings;
143+
}
144+
}
145+
146+
private final ImmutableSet<Function> functions;
147+
148+
CelRegexExtensions() {
149+
this.functions = ImmutableSet.copyOf(Function.values());
150+
}
151+
152+
CelRegexExtensions(Set<Function> functions) {
153+
this.functions = ImmutableSet.copyOf(functions);
154+
}
155+
156+
@Override
157+
public void setCheckerOptions(CelCheckerBuilder checkerBuilder) {
158+
functions.forEach(function -> checkerBuilder.addFunctionDeclarations(function.functionDecl));
159+
}
160+
161+
@Override
162+
public void setRuntimeOptions(CelRuntimeBuilder runtimeBuilder) {
163+
functions.forEach(function -> runtimeBuilder.addFunctionBindings(function.functionBindings));
164+
}
165+
166+
private static Pattern compileRegexPattern(String regex) {
167+
try {
168+
return Pattern.compile(regex);
169+
} catch (PatternSyntaxException e) {
170+
throw new IllegalArgumentException("Failed to compile regex: " + regex, e);
171+
}
172+
}
173+
174+
private static String replace(String target, String regex, String replaceStr) {
175+
Pattern pattern = compileRegexPattern(regex);
176+
Matcher matcher = pattern.matcher(target);
177+
return matcher.replaceAll(replaceStr);
178+
}
179+
180+
private static String replace(String target, String regex, String replaceStr, long replaceCount) {
181+
Pattern pattern = compileRegexPattern(regex);
182+
183+
if (replaceCount == 0) {
184+
return target;
185+
}
186+
187+
Matcher matcher = pattern.matcher(target);
188+
StringBuffer sb = new StringBuffer();
189+
int counter = 0;
190+
191+
while (matcher.find()) {
192+
if (replaceCount != -1 && counter >= replaceCount) {
193+
break;
194+
}
195+
matcher.appendReplacement(sb, replaceStr);
196+
counter++;
197+
}
198+
matcher.appendTail(sb);
199+
200+
return sb.toString();
201+
}
202+
203+
private static Optional<String> captureFirstMatch(String target, String regex) {
204+
Pattern pattern = compileRegexPattern(regex);
205+
Matcher matcher = pattern.matcher(target);
206+
207+
if (matcher.find()) {
208+
// If there are capture groups, return the first one.
209+
if (matcher.groupCount() > 0) {
210+
return Optional.ofNullable(matcher.group(1));
211+
} else {
212+
// If there are no capture groups, return the entire match.
213+
return Optional.of(matcher.group(0));
214+
}
215+
}
216+
217+
return Optional.empty();
218+
}
219+
220+
private static ImmutableList<String> captureAllMatches(String target, String regex) {
221+
Pattern pattern = compileRegexPattern(regex);
222+
223+
Matcher matcher = pattern.matcher(target);
224+
ImmutableList.Builder<String> builder = ImmutableList.builder();
225+
226+
while (matcher.find()) {
227+
// If there are capture groups, return all of them. Otherwise, return the entire match.
228+
if (matcher.groupCount() > 0) {
229+
// Add all the capture groups to the result list.
230+
for (int i = 1; i <= matcher.groupCount(); i++) {
231+
String group = matcher.group(i);
232+
if (group != null) {
233+
builder.add(group);
234+
}
235+
}
236+
} else {
237+
builder.add(matcher.group(0));
238+
}
239+
}
240+
241+
return builder.build();
242+
}
243+
244+
private static ImmutableMap<String, String> captureAllNamedGroups(String target, String regex) {
245+
ImmutableMap.Builder<String, String> builder = ImmutableMap.builder();
246+
Pattern pattern = compileRegexPattern(regex);
247+
248+
Set<String> groupNames = pattern.namedGroups().keySet();
249+
if (groupNames.isEmpty()) {
250+
return builder.build();
251+
}
252+
253+
Matcher matcher = pattern.matcher(target);
254+
255+
while (matcher.find()) {
256+
257+
for (String groupName : groupNames) {
258+
String capturedValue = matcher.group(groupName);
259+
if (capturedValue != null) {
260+
builder.put(groupName, capturedValue);
261+
}
262+
}
263+
}
264+
return builder.buildOrThrow();
265+
}
266+
}

extensions/src/test/java/dev/cel/extensions/CelExtensionsTest.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,10 @@ public void getAllFunctionNames() {
178178
"base64.decode",
179179
"base64.encode",
180180
"flatten",
181-
"lists.range");
181+
"lists.range",
182+
"regex.replace",
183+
"regex.capture",
184+
"regex.captureAll",
185+
"regex.captureAllNamed");
182186
}
183187
}

0 commit comments

Comments
 (0)