-
Notifications
You must be signed in to change notification settings - Fork 13.7k
[mlir] Sanitize identifiers with leading symbol. #94795
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[mlir] Sanitize identifiers with leading symbol. #94795
Conversation
Presently, if name starts with a symbol it's converted to hex which may cause the result to be invalid by starting with a digit. Address this and add a small test.
@llvm/pr-subscribers-mlir-core Author: Will Dietz (dtzSiFive) ChangesPresently, if name starts with a symbol it's converted to hex which may cause the result to be invalid by starting with a digit. Address this and add a small test. Full diff: https://github.com/llvm/llvm-project/pull/94795.diff 3 Files Affected:
diff --git a/mlir/lib/IR/AsmPrinter.cpp b/mlir/lib/IR/AsmPrinter.cpp
index 6a362afc52f25..2c43a6f15aa83 100644
--- a/mlir/lib/IR/AsmPrinter.cpp
+++ b/mlir/lib/IR/AsmPrinter.cpp
@@ -999,9 +999,13 @@ static StringRef sanitizeIdentifier(StringRef name, SmallString<16> &buffer,
bool allowTrailingDigit = true) {
assert(!name.empty() && "Shouldn't have an empty name here");
+ auto validChar = [&](char ch) {
+ return llvm::isAlnum(ch) || allowedPunctChars.contains(ch);
+ };
+
auto copyNameToBuffer = [&] {
for (char ch : name) {
- if (llvm::isAlnum(ch) || allowedPunctChars.contains(ch))
+ if (validChar(ch))
buffer.push_back(ch);
else if (ch == ' ')
buffer.push_back('_');
@@ -1013,7 +1017,7 @@ static StringRef sanitizeIdentifier(StringRef name, SmallString<16> &buffer,
// Check to see if this name is valid. If it starts with a digit, then it
// could conflict with the autogenerated numeric ID's, so add an underscore
// prefix to avoid problems.
- if (isdigit(name[0])) {
+ if (isdigit(name[0]) || (!validChar(name[0]) && name[0] != ' ')) {
buffer.push_back('_');
copyNameToBuffer();
return buffer;
@@ -1029,7 +1033,7 @@ static StringRef sanitizeIdentifier(StringRef name, SmallString<16> &buffer,
// Check to see that the name consists of only valid identifier characters.
for (char ch : name) {
- if (!llvm::isAlnum(ch) && !allowedPunctChars.contains(ch)) {
+ if (!validChar(ch)) {
copyNameToBuffer();
return buffer;
}
diff --git a/mlir/test/IR/print-attr-type-aliases.mlir b/mlir/test/IR/print-attr-type-aliases.mlir
index 162eacd002283..27c5a75addbb5 100644
--- a/mlir/test/IR/print-attr-type-aliases.mlir
+++ b/mlir/test/IR/print-attr-type-aliases.mlir
@@ -11,6 +11,9 @@
// CHECK-DAG: #_0_test_alias = "alias_test:prefixed_digit"
"test.op"() {alias_test = "alias_test:prefixed_digit"} : () -> ()
+// CHECK-DAG: #_25test = "alias_test:prefixed_symbol"
+"test.op"() {alias_test = "alias_test:prefixed_symbol"} : () -> ()
+
// CHECK-DAG: #test_alias_conflict0_ = "alias_test:sanitize_conflict_a"
// CHECK-DAG: #test_alias_conflict0_1 = "alias_test:sanitize_conflict_b"
"test.op"() {alias_test = ["alias_test:sanitize_conflict_a", "alias_test:sanitize_conflict_b"]} : () -> ()
diff --git a/mlir/test/lib/Dialect/Test/TestDialectInterfaces.cpp b/mlir/test/lib/Dialect/Test/TestDialectInterfaces.cpp
index a3a8913d5964c..64add8cef3698 100644
--- a/mlir/test/lib/Dialect/Test/TestDialectInterfaces.cpp
+++ b/mlir/test/lib/Dialect/Test/TestDialectInterfaces.cpp
@@ -188,6 +188,7 @@ struct TestOpAsmInterface : public OpAsmDialectInterface {
.Case("alias_test:dot_in_name", StringRef("test.alias"))
.Case("alias_test:trailing_digit", StringRef("test_alias0"))
.Case("alias_test:prefixed_digit", StringRef("0_test_alias"))
+ .Case("alias_test:prefixed_symbol", StringRef("%test"))
.Case("alias_test:sanitize_conflict_a",
StringRef("test_alias_conflict0"))
.Case("alias_test:sanitize_conflict_b",
|
@llvm/pr-subscribers-mlir Author: Will Dietz (dtzSiFive) ChangesPresently, if name starts with a symbol it's converted to hex which may cause the result to be invalid by starting with a digit. Address this and add a small test. Full diff: https://github.com/llvm/llvm-project/pull/94795.diff 3 Files Affected:
diff --git a/mlir/lib/IR/AsmPrinter.cpp b/mlir/lib/IR/AsmPrinter.cpp
index 6a362afc52f25..2c43a6f15aa83 100644
--- a/mlir/lib/IR/AsmPrinter.cpp
+++ b/mlir/lib/IR/AsmPrinter.cpp
@@ -999,9 +999,13 @@ static StringRef sanitizeIdentifier(StringRef name, SmallString<16> &buffer,
bool allowTrailingDigit = true) {
assert(!name.empty() && "Shouldn't have an empty name here");
+ auto validChar = [&](char ch) {
+ return llvm::isAlnum(ch) || allowedPunctChars.contains(ch);
+ };
+
auto copyNameToBuffer = [&] {
for (char ch : name) {
- if (llvm::isAlnum(ch) || allowedPunctChars.contains(ch))
+ if (validChar(ch))
buffer.push_back(ch);
else if (ch == ' ')
buffer.push_back('_');
@@ -1013,7 +1017,7 @@ static StringRef sanitizeIdentifier(StringRef name, SmallString<16> &buffer,
// Check to see if this name is valid. If it starts with a digit, then it
// could conflict with the autogenerated numeric ID's, so add an underscore
// prefix to avoid problems.
- if (isdigit(name[0])) {
+ if (isdigit(name[0]) || (!validChar(name[0]) && name[0] != ' ')) {
buffer.push_back('_');
copyNameToBuffer();
return buffer;
@@ -1029,7 +1033,7 @@ static StringRef sanitizeIdentifier(StringRef name, SmallString<16> &buffer,
// Check to see that the name consists of only valid identifier characters.
for (char ch : name) {
- if (!llvm::isAlnum(ch) && !allowedPunctChars.contains(ch)) {
+ if (!validChar(ch)) {
copyNameToBuffer();
return buffer;
}
diff --git a/mlir/test/IR/print-attr-type-aliases.mlir b/mlir/test/IR/print-attr-type-aliases.mlir
index 162eacd002283..27c5a75addbb5 100644
--- a/mlir/test/IR/print-attr-type-aliases.mlir
+++ b/mlir/test/IR/print-attr-type-aliases.mlir
@@ -11,6 +11,9 @@
// CHECK-DAG: #_0_test_alias = "alias_test:prefixed_digit"
"test.op"() {alias_test = "alias_test:prefixed_digit"} : () -> ()
+// CHECK-DAG: #_25test = "alias_test:prefixed_symbol"
+"test.op"() {alias_test = "alias_test:prefixed_symbol"} : () -> ()
+
// CHECK-DAG: #test_alias_conflict0_ = "alias_test:sanitize_conflict_a"
// CHECK-DAG: #test_alias_conflict0_1 = "alias_test:sanitize_conflict_b"
"test.op"() {alias_test = ["alias_test:sanitize_conflict_a", "alias_test:sanitize_conflict_b"]} : () -> ()
diff --git a/mlir/test/lib/Dialect/Test/TestDialectInterfaces.cpp b/mlir/test/lib/Dialect/Test/TestDialectInterfaces.cpp
index a3a8913d5964c..64add8cef3698 100644
--- a/mlir/test/lib/Dialect/Test/TestDialectInterfaces.cpp
+++ b/mlir/test/lib/Dialect/Test/TestDialectInterfaces.cpp
@@ -188,6 +188,7 @@ struct TestOpAsmInterface : public OpAsmDialectInterface {
.Case("alias_test:dot_in_name", StringRef("test.alias"))
.Case("alias_test:trailing_digit", StringRef("test_alias0"))
.Case("alias_test:prefixed_digit", StringRef("0_test_alias"))
+ .Case("alias_test:prefixed_symbol", StringRef("%test"))
.Case("alias_test:sanitize_conflict_a",
StringRef("test_alias_conflict0"))
.Case("alias_test:sanitize_conflict_b",
|
Thanks! |
Presently, if name starts with a symbol it's converted to hex which may cause the result to be invalid by starting with a digit. Address this and add a small test. Co-authored-by: Will Dietz <w@wdtz.org>
Presently, if name starts with a symbol it's converted to hex which may cause the result to be invalid by starting with a digit.
Address this and add a small test.